# Get data
if(file.exists(path_extdrg)){
extdrg <- read_csv(path_extdrg)
} else{
df <- read_csv(src_extdrg_prihits) %>%
transmute(gene = sub("^(.*)_(.*?)$", "\\1", treatment),
sirna = sub("^(.*)_(.*?)$", "\\2", treatment),
score = median) %>%
filter(!grepl("Scrambled", gene)) %>%
filter(!grepl("vx809", gene)) %>%
filter(!grepl("empty", gene)) %>%
group_by(gene) %>%
summarise(score = mean(score),
nsirna = n())
gene0 <- df$gene # Get gene symbols
up <- genesymbol_to_uniprot(gene0) # Convert gene symbols to uniprot
gene_na <- gene0[which(is.na(up))] # Gene symbols for which a Uniprot ID could not be found
up[is.na(up)] <- extraids[match(gene_na, extraids$symbol), "uniprot"] # Manually add missing uniprot ids
gene <- uniprot_to_genesymbol(up)
ensg <- uniprot_to_ensembl(up)
id <- uniprot_to_geneid(up)
gname <- uniprot_to_genename(up, parallelize=TRUE)
extdrg <- tibble(symbol = gene,
uniprot = up,
ensembl = ensg,
entrez = id,
name = gname,
score = df$score,
nsirna = df$nsirna)
write.csv(extdrg, path_extdrg, row.names = FALSE)
}
extdrgF <- extdrg %>%
group_by(uniprot) %>%
slice(1) %>% # Remove duplicate uniprot IDs
ungroup() %>%
filter(!is.na(uniprot)) # Remove unknowns
# Library statistics (mapped genes)
n_extdrg <- nrow(extdrg)
n_extdrg_siRNAs <- sum(extdrg$nsirna)
n_extdrgF <- nrow(extdrgF)
n_extdrgF_siRNAs <- sum(extdrgF$nsirna)
All data from EXTDRG library:
After name updating:
# Get data
if(file.exists(path_extdrg_prihits)){
extdrg_prihits <- read_csv(path_extdrg_prihits)
} else{
df <- read_csv(src_extdrg_prihits) %>%
transmute(gene = sub("^(.*)_(.*?)$", "\\1", treatment),
sirna = sub("^(.*)_(.*?)$", "\\2", treatment),
score = median) %>%
filter(!grepl("Scrambled", gene)) %>%
filter(!grepl("vx809", gene)) %>%
filter(!grepl("empty", gene)) %>%
filter(score >= 2) %>%
group_by(gene) %>%
summarise(score = mean(score),
nsirna = n()) %>%
arrange(desc(score))
gene0 <- df$gene # Get gene symbols
up <- genesymbol_to_uniprot(gene0) # Convert gene symbols to uniprot
gene_na <- gene0[which(is.na(up))] # Gene symbols for which a Uniprot ID could not be found
up[is.na(up)] <- extraids[match(gene_na, extraids$symbol), "uniprot"] # Manually add missing uniprot ids
gene <- uniprot_to_genesymbol(up)
ensg <- uniprot_to_ensembl(up)
id <- uniprot_to_geneid(up)
gname <- uniprot_to_genename(up, parallelize=TRUE)
extdrg_prihits <- tibble(symbol = gene,
uniprot = up,
ensembl = ensg,
entrez = id,
name = gname,
score = df$score,
nsirna = df$nsirna)
write.csv(extdrg_prihits, path_extdrg_prihits, row.names = FALSE)
}
extdrg_prihitsF <- extdrg_prihits %>%
group_by(uniprot) %>%
slice(1) %>% # Remove duplicate uniprot IDs
ungroup() %>%
filter(!is.na(uniprot)) # Remove unknowns
# Library statistics (mapped genes)
n_extdrg_prihits <- nrow(extdrg_prihits)
n_extdrg_prihits_siRNAs <- sum(extdrg_prihits$nsirna)
n_extdrg_prihitsF <- nrow(extdrg_prihitsF)
n_extdrg_prihitsF_siRNAs <- sum(extdrg_prihitsF$nsirna)
All data from EXTDRG primary hits:
After name updating:
# Get data
if(file.exists(path_extdrg_cnfhits)){
extdrg_cnfhits <- read_csv(path_extdrg_cnfhits)
} else{
df <- read_csv(src_extdrg_cnfhits) %>%
mutate(gene = sub("^(.*)_(.*?)$", "\\1", treatment),
sirna = sub("^(.*)_(.*?)$", "\\2", treatment),
score = F508del) %>%
filter(!grepl("^Scrambled", gene)) %>%
filter(!grepl("^Neg", gene)) %>%
filter(!Control) %>%
filter(hit_newscore227) %>%
filter(!is.na(score)) %>%
select(gene, sirna, score, Uniprot) %>%
filter(score >= 1) %>%
group_by(gene, Uniprot) %>%
summarise(score = mean(score),
nsirna = n()) %>%
arrange(desc(score))
up <- df$Uniprot
gene <- uniprot_to_genesymbol(up)
ensg <- uniprot_to_ensembl(up)
id <- uniprot_to_geneid(up)
gname <- uniprot_to_genename(up, parallelize=TRUE)
extdrg_cnfhits <- tibble(symbol = gene,
uniprot = up,
ensembl = ensg,
entrez = id,
name = gname,
score = df$score,
nsirna = df$nsirna)
write.csv(extdrg_cnfhits, path_extdrg_cnfhits, row.names = FALSE)
}
extdrg_cnfhitsF <- extdrg_cnfhits %>%
group_by(uniprot) %>%
slice(1) %>% # Remove duplicate uniprot IDs
ungroup() %>%
filter(!is.na(uniprot)) # Remove unknowns
# Library statistics (mapped genes)
n_extdrg_cnfhits <- nrow(extdrg_cnfhits)
n_extdrg_cnfhits_siRNAs <- sum(extdrg_cnfhits$nsirna)
n_extdrg_cnfhitsF <- nrow(extdrg_cnfhitsF)
n_extdrg_cnfhitsF_siRNAs <- sum(extdrg_cnfhitsF$nsirna)
All data from EXTDRG confirmed hits:
After name updating:
| id | symbol | uniprot | name |
|---|---|---|---|
| 1 | ACSBG2 | Q5FVE4 | Long-chain-fatty-acid–CoA ligase ACSBG2 |
| 2 | ADAMTS6 | Q9UKP5 | A disintegrin and metalloproteinase with thrombospondin motifs 6 |
| 3 | APOB | P04114 | Apolipoprotein B-100 |
| 4 | ARVCF | O00192 | Armadillo repeat protein deleted in velo-cardio-facial syndrome |
| 5 | CCL27 | Q9Y4X3 | C-C motif chemokine 27 |
| 6 | CITED2 | Q99967 | Cbp/p300-interacting transactivator 2 |
| 7 | CLDN4 | O14493 | Claudin-4 |
| 8 | COL5A1 | P20908 | Collagen alpha-1(V) chain |
| 9 | CPZ | Q66K79 | Carboxypeptidase Z |
| 10 | CREBBP | Q92793 | CREB-binding protein |
| 11 | CYSLTR2 | Q9NS75 | Cysteinyl leukotriene receptor 2 |
| 12 | DCSTAMP | Q9H295 | Dendritic cell-specific transmembrane protein |
| 13 | EVI5L | Q96CN4 | EVI5-like protein |
| 14 | FOLR2 | P14207 | Folate receptor beta |
| 15 | GJB2 | P29033 | Gap junction beta-2 protein |
| 16 | GUCY2D | Q02846 | Retinal guanylyl cyclase 1 |
| 17 | HELLS | Q9NRZ9 | Lymphoid-specific helicase |
| 18 | IL24 | Q13007 | Interleukin-24 |
| 19 | ISL2 | Q96A47 | Insulin gene enhancer protein ISL-2 |
| 20 | KIF17 | Q9P2E2 | Kinesin-like protein KIF17 |
| 21 | LDLRAD3 | Q86YD5 | Low-density lipoprotein receptor class A domain-containing protein 3 |
| 22 | LIN9 | Q5TKA1 | Protein lin-9 homolog |
| 23 | LRRK1 | Q38SD2 | Leucine-rich repeat serine/threonine-protein kinase 1 |
| 24 | NOVA1 | P51513 | RNA-binding protein Nova-1 |
| 25 | NTNG2 | Q96CW9 | Netrin-G2 |
| 26 | OXSR1 | O95747 | Serine/threonine-protein kinase OSR1 |
| 27 | PCDHB2 | Q9Y5E7 | Protocadherin beta-2 |
| 28 | QRSL1 | Q9H0R6 | Glutamyl-tRNA(Gln) amidotransferase subunit A, mitochondrial |
| 29 | RECQL5 | O94762 | ATP-dependent DNA helicase Q5 |
| 30 | SLC30A1 | Q9Y6M5 | Zinc transporter 1 |
| 31 | STYK1 | Q6J9G0 | Tyrosine-protein kinase STYK1 |
| 32 | TPK1 | Q9H3S4 | Thiamin pyrophosphokinase 1 |
| 33 | VPS26A | O75436 | Vacuolar protein sorting-associated protein 26A |
| 34 | ZNF141 | Q15928 | Zinc finger protein 141 |
| 35 | ZNF384 | Q8TF68 | Zinc finger protein 384 |
Nature (2015) 528, 510–516
deltaF508 CFTR interactome remodelling promotes rescue of cystic
fibrosis
Sandra Pankow, Casimir Bamberger, Diego Calzolari, Salvador
Martínez-Bartolomé, Mathieu Lavallée-Adam, William E. Balch & John
R. Yates III
https://doi.org/10.1038/nature15729
Immunoprecipitation-based proteomic-profiling of isogenic HBE41o- WT- and F508del-CFTR cells. Besides wt- and F508del-CFTR interactors (direct and indirect) a core CFTR interactome is defined: proteins which interact with at least one CFTR variant.
The authors develop and apply a co-purifying protein identification technology (CoPIT), an IP-based proteomic-profiling approach of protein-protein interactions. They first determined the changes that occur between the WT- and F508del-CFTR interactome in HBE41o- and CFBE41o- bronchial epithelial cell lines. They also study interactome changes upon temperature shift (to 30ºC for 1h, 6h and 24h) and HDACi treatment in CFBE cells.
The most important outcome of this study are the interactomes of WT-CFTR (Table S2) and F508del-CFTR (Table S3) which comprise 395 and 526 proteins, respectively, with an overlap of more than 85% or 638 proteins (Table S1).
# Core CFTR interactome
# Get data
if(file.exists(path_pankow2015_core)){
pankow2015_core <- read_csv(path_pankow2015_core)
} else{
tmp <- paste0(tempfile(), ".xlsx")
download.file(url = src_pankow2015_core, destfile = tmp, mode="wb")
df <- tibble(read.xlsx(tmp, sheet = 1))
up <- df$Uniprot_Entry
gene <- uniprot_to_genesymbol(up) # Get gene symbols
up_na <- up[which(is.na(gene))] # Gene uniprot IDs for which a gene symbol could not be found
gene[is.na(gene)] <- extraids[match(up_na, extraids$uniprot), "symbol"] # Add missing gene symbols
ensg <- uniprot_to_ensembl(up)
id <- uniprot_to_geneid(up)
gname <- uniprot_to_genename(up, parallelize = TRUE)
pankow2015_core <- tibble(symbol = gene,
uniprot = up,
ensembl = ensg,
entrez = id,
name = gname)
write.csv(pankow2015_core, path_pankow2015_core, row.names = FALSE)
}
# Library statistics (mapped genes)
pankow2015_coreF <- pankow2015_core %>%
group_by(uniprot) %>%
slice(1) %>% # Remove duplicate uniprot IDs
ungroup() %>%
filter(!is.na(uniprot)) # Remove unknowns
n_pankow2015_core <- nrow(pankow2015_core)
n_pankow2015_coreF <- nrow(pankow2015_coreF)
# wt-CFTR interactome
# Get data
if(file.exists(path_pankow2015_wt)){
pankow2015_wt <- read_csv(path_pankow2015_wt)
} else{
tmp <- paste0(tempfile(), ".xlsx")
download.file(url = src_pankow2015_wt, destfile = tmp, mode="wb")
# gene symbols of wt-CFTR interactors
df <- tmp %>%
read.xlsx(sheet = 2) %>%
dplyr::select(GeneSymbol, Protein1_Genemania_ID) %>%
filter(!is.na(GeneSymbol)) %>%
filter(GeneSymbol != "CFTR")
up <- pankow2015_core %>%
filter(symbol %in% df$GeneSymbol) %>%
pull(uniprot)
gene <- uniprot_to_genesymbol(up) # Get gene symbols
up_na <- up[which(is.na(gene))] # Gene uniprot IDs for which a gene symbol could not be found
gene[is.na(gene)] <- extraids[match(up_na, extraids$uniprot), "symbol"] # Add missing gene symbols
id <- uniprot_to_geneid(up)
ensg <- uniprot_to_ensembl(up)
gname <- uniprot_to_genename(up, parallelize = TRUE)
pankow2015_wt <- tibble(symbol = gene,
uniprot = up,
ensembl = ensg,
entrez = id,
name = gname)
write.csv(pankow2015_wt, path_pankow2015_wt, row.names = FALSE)
}
# Library statistics (mapped genes)
pankow2015_wtF <- pankow2015_wt %>%
group_by(uniprot) %>%
slice(1) %>% # Remove duplicate uniprot IDs
ungroup() %>%
filter(!is.na(uniprot)) # Remove unknowns
n_pankow2015_wt <- nrow(pankow2015_wt)
n_pankow2015_wtF <- nrow(pankow2015_wtF)
# F508del-CFTR interactome
# Get data
if(file.exists(path_pankow2015_df)){
pankow2015_df <- read_csv(path_pankow2015_df)
} else{
tmp <- paste0(tempfile(), ".xlsx")
download.file(url = src_pankow2015_df, destfile = tmp, mode="wb")
# gene symbols of F508del-CFTR interactors
df <- tmp %>%
read.xlsx(sheet = 3) %>%
dplyr::select(GeneSymbol, Protein1_Genemania_ID) %>%
filter(!is.na(GeneSymbol)) %>%
filter(GeneSymbol != "CFTR")
up <- pankow2015_core %>%
filter(symbol %in% df$GeneSymbol) %>%
pull(uniprot)
gene <- uniprot_to_genesymbol(up) # Get gene symbols
up_na <- up[which(is.na(gene))] # Gene uniprot IDs for which a gene symbol could not be found
gene[is.na(gene)] <- extraids[match(up_na, extraids$uniprot), "symbol"] # Add missing gene symbols
id <- uniprot_to_geneid(up)
ensg <- uniprot_to_ensembl(up)
gname <- uniprot_to_genename(up, parallelize = TRUE)
pankow2015_df <- tibble(symbol = gene,
uniprot = up,
ensembl = ensg,
entrez = id,
name = gname)
write.csv(pankow2015_df, path_pankow2015_df, row.names = FALSE)
}
pankow2015_dfF <- pankow2015_df %>%
group_by(uniprot) %>%
slice(1) %>% # Remove duplicate uniprot IDs
ungroup() %>%
filter(!is.na(uniprot)) # Remove unknowns
# Library statistics (mapped genes)
n_pankow2015_df <- nrow(pankow2015_df)
n_pankow2015_dfF <- nrow(pankow2015_dfF)
Core CFTR interactome:
* All data: 638 genes.
* After name updating: 637 genes.
wt-CFTR interactome:
* All data: 364 genes.
* After name updating: 363 genes.
F508del-CFTR interactome:
* All data: 481 genes.
* After name updating: 480 genes.
Primary traffic hits & Pankow2018 (core)
| id | symbol | uniprot | name |
|---|---|---|---|
| 1 | BAG3 | O95817 | BAG family molecular chaperone regulator 3 |
| 2 | CCT5 | P48643 | T-complex protein 1 subunit epsilon |
| 3 | MMS19 | Q96T76 | MMS19 nucleotide excision repair protein homolog |
| 4 | MYH14 | Q7Z406 | Myosin-14 |
| 5 | NCBP2 | P52298 | Nuclear cap-binding protein subunit 2 |
| 6 | PABPC3 | Q9H361 | Polyadenylate-binding protein 3 |
| 7 | PSMA6 | P60900 | Proteasome subunit alpha type-6 |
| 8 | PSMB1 | P20618 | Proteasome subunit beta type-1 |
| 9 | UQCRC1 | P31930 | Cytochrome b-c1 complex subunit 1, mitochondrial |
Primary traffic hits & Pankow2018 (F508del)
| id | symbol | uniprot | name |
|---|---|---|---|
| 1 | BAG3 | O95817 | BAG family molecular chaperone regulator 3 |
| 2 | CCT5 | P48643 | T-complex protein 1 subunit epsilon |
| 3 | MMS19 | Q96T76 | MMS19 nucleotide excision repair protein homolog |
| 4 | MYH14 | Q7Z406 | Myosin-14 |
| 5 | NCBP2 | P52298 | Nuclear cap-binding protein subunit 2 |
| 6 | PABPC3 | Q9H361 | Polyadenylate-binding protein 3 |
| 7 | PSMA6 | P60900 | Proteasome subunit alpha type-6 |
| 8 | PSMB1 | P20618 | Proteasome subunit beta type-1 |
| 9 | UQCRC1 | P31930 | Cytochrome b-c1 complex subunit 1, mitochondrial |
Cell Mol Life Sci (2018) 75(24):4495-4509
Proteomic interaction profiling reveals KIFC1 as a factor
involved in early targeting of F508del-CFTR to
degradation
Sara Canato, João D Santos, Ana S Carvalho, Kerman Aloria, Margarida D
Amaral, Rune Matthiesen, André O Falcao, Carlos M Farinha
https://doi.org/10.1007/s00018-018-2896-7
Total interactome identified for F508del-CFTR and F508del-4RK-CFTR. Protein complexes were isolated by pulling-down CFTR from CFBE cells expressing these two variants.
The authors used a proteomic profiling approach coupled to global bioinformatic analysis to investigate the differential protein-protein interactions (PPIs) of F508del-CFTR vs F508del-4RK-CFTR (which allows F508delCFTR to escape the ERQC and consequently to reach the plasma membrane (PM) without, however, a significant correction in its folding). This approach resulted in the mapping of the comparative interactome of F508del-CFTR and F508del-4RK-CFTR to identify biological pathways dictating ER retention and degradation of F508del-CFTR and novel putative therapeutic targets in CF.
An important outcome of this study is a F508del-CFTR interactome from CFBE cells expressing this variant, which can be found in Table S1 and comprises 828 proteins. An interactome for F508del-4RK-CFTR is also provided. Furthermore, they compare their F508del-CFTR interactome with the “core CFTR interactome” found by Pankow and collaborators and find 217 common proteins corresponding to 26% of their complete list of interactors. This suggests that this work by Canato et al has additional interactors which might be of interest.
# F508del-CFTR interactome
# Get data
if(file.exists(path_canato2018_df)){
canato2018_df <- read_csv(path_canato2018_df)
} else{
tmp <- tempfile()
download.file(url = src_canato2018, destfile = tmp, mode="wb")
df <- tmp %>%
read.xlsx(cols=c(4:6, 10)) %>%
tibble() %>%
mutate(across(starts_with("DF"), as.numeric)) %>%
mutate(up0 = sub("^.*?\\|(.*?)\\|(.*?)_.*$", "\\1", Head),
gn0 = sub("^.*?\\|(.*?)\\|(.*?)_.*$", "\\2", Head),
sum = rowSums(across(starts_with("DF")))) %>%
filter(sum > 0)
up <- df$up0
gene <- uniprot_to_genesymbol(up)
up_na <- up[which(is.na(gene))]
gene[is.na(gene)] <- extraids[match(up_na, extraids$uniprot), "symbol"]
ensg <- uniprot_to_ensembl(up)
id <- uniprot_to_geneid(up)
gname <- uniprot_to_genename(up, parallelize = TRUE)
canato2018_df <- tibble(symbol = gene,
uniprot = up,
ensembl = ensg,
entrez = id,
name = gname)
write.csv(canato2018_df, path_canato2018_df, row.names = FALSE)
}
# Library statistics (mapped genes)
bad_up <- extraids %>%
filter(!is.na(uniprot) & !in_database & uniprot %in% canato2018_df$uniprot) %>%
pull(uniprot) # Deprecated Uniprot IDs
canato2018_dfF <- canato2018_df %>%
group_by(uniprot) %>%
slice(1) %>% # Remove duplicate uniprot IDs
ungroup() %>%
filter(!is.na(uniprot)) %>% # Remove unknowns
filter(!uniprot %in% bad_up) # Uniprots not in the database
n_canato2018_df <- nrow(canato2018_df)
n_canato2018_dfF <- nrow(canato2018_dfF)
# 4RK-CFTR interactome
# Get data
if(file.exists(path_canato2018_4RK)){
canato2018_4RK <- read_csv(path_canato2018_4RK)
} else{
tmp <- tempfile()
download.file(url = src_canato2018, destfile = tmp, mode="wb")
df <- tmp %>%
read.xlsx(cols=c(1:3, 10)) %>%
tibble() %>%
mutate(across(starts_with("4RK"), as.numeric)) %>%
mutate(up0 = sub("^.*?\\|(.*?)\\|(.*?)_.*$", "\\1", Head),
gn0 = sub("^.*?\\|(.*?)\\|(.*?)_.*$", "\\2", Head),
sum = rowSums(across(starts_with("4RK")))) %>%
filter(sum > 0)
up <- df$up0
gene <- uniprot_to_genesymbol(up)
up_na <- up[which(is.na(gene))]
gene[is.na(gene)] <- extraids[match(up_na, extraids$uniprot), "symbol"]
ensg <- uniprot_to_ensembl(up)
id <- uniprot_to_geneid(up)
gname <- uniprot_to_genename(up, parallelize = TRUE)
canato2018_4RK <- tibble(symbol = gene,
uniprot = up,
ensembl = ensg,
entrez = id,
name = gname)
write.csv(canato2018_4RK, path_canato2018_4RK, row.names = FALSE)
}
# Library statistics (mapped genes)
bad_up <- extraids %>%
filter(!is.na(uniprot) & !in_database & uniprot %in% canato2018_4RK$uniprot) %>%
pull(uniprot) # Deprecated Uniprot IDs
canato2018_4RKF <- canato2018_4RK %>%
group_by(uniprot) %>%
slice(1) %>% # Remove duplicate uniprot IDs
ungroup() %>%
filter(!is.na(uniprot)) %>% # Remove unknowns
filter(!uniprot %in% bad_up) # Uniprots not in the database
n_canato2018_4RK <- nrow(canato2018_4RK)
n_canato2018_4RKF <- nrow(canato2018_4RKF)
F508del-CFTR interactome:
* All data: 828 genes.
* After name updating: 808 genes.
4RK-F508del-CFTR interactome:
* All data: 793 genes.
* After name updating: 774 genes.
Primary traffic hits & Canato2018
| id | symbol | uniprot | name |
|---|---|---|---|
| 1 | DNAJC8 | O75937 | DnaJ homolog subfamily C member 8 |
| 2 | NCBP2 | P52298 | Nuclear cap-binding protein subunit 2 |
Cells (2019) 8(4):353 Folding Status Is Determinant over
Traffic-Competence in Defining CFTR Interactors in the Endoplasmic
Reticulum
João D. Santos, Sara Canato, Ana S. Carvalho, Hugo M. Botelho, Kerman
Aloria, Margarida D. Amaral, Rune Matthiesen, Andre O. Falcao, Carlos M.
Farinha
https://doi.org/10.3390/cells8040353
Pulled-down proteins that interact with either wt- or DD/AA-CFTR in CFBE cells following a general strategy consisting of: (i) cell lysis and immunoprecipitation (IP) of CFTR and interacting proteins; (ii) identification of proteins by LC-MS/MS; and (iii) analysis of the CFTR interactome.
The authors aimed to explore the CFTR ERQC trafficking checkpoints by performing global comparisons of the interactions that regulate CFTR exit from the ER to identify protein factors involved in these processes. They used protein interaction profiling and global bioinformatics analyses and identified proteins that interact specifically with peptides harbouring the normal (wt) and 4RK AFT sequences, as well as proteins that interact with CFTR with/without abrogation of the DAD code (whose presence totally blocks CFTR exit from the ER). They also cross their datasets with the ones from Canato et al 2018.
An important outcome of this study is a wt-CFTR interactome from CFBE cells, which can be found in Dataset S1B and comprises 945 proteins. An interactome for DD/AA-CFTR is also provided.
# wt-CFTR interactome
# Get data
if(file.exists(path_santos2019_wt)){
santos2019_wt <- read_csv(path_santos2019_wt)
} else{
tmp <- tempfile()
tmpdir <- paste0(dirname(tmp), "/santos2019")
download.file(url = src_santos2019, destfile = tmp, mode="wb")
unzip(tmp, exdir = tmpdir)
gene0 <- read.xlsx(paste0(tmpdir, "/Supplementary materials/Datasets_S1 to S7.xlsx"), sheet = 2, colNames = FALSE)[[1]]
up <- genesymbol_to_uniprot(gene0)
gene_na <- gene0[which(is.na(up))]
up[is.na(up)] <- extraids[match(gene_na, extraids$symbol), "uniprot"]
gene <- uniprot_to_genesymbol(up)
ensg <- uniprot_to_ensembl(up)
id <- uniprot_to_geneid(up)
gname <- uniprot_to_genename(up, parallelize = TRUE)
santos2019_wt <- tibble(symbol = gene,
uniprot = up,
ensembl = ensg,
entrez = id,
name = gname)
write.csv(santos2019_wt, path_santos2019_wt, row.names = FALSE)
}
# Library statistics (mapped genes)
santos2019_wtF <- santos2019_wt %>%
group_by(uniprot) %>%
slice(1) %>% # Remove duplicate uniprot IDs
ungroup() %>%
filter(!is.na(uniprot)) # Remove unknowns
n_santos2019_wt <- nrow(santos2019_wt)
n_santos2019_wtF <- nrow(santos2019_wtF)
# DD/AA-CFTR interactome
# Get data
if(file.exists(path_santos2019_DDAA)){
santos2019_DDAA <- read_csv(path_santos2019_DDAA)
} else{
tmp <- tempfile()
tmpdir <- paste0(dirname(tmp), "/santos2019")
download.file(url = src_santos2019, destfile = tmp, mode="wb")
unzip(tmp, exdir = tmpdir)
gene0 <- read.xlsx(paste0(tmpdir, "/Supplementary materials/Datasets_S1 to S7.xlsx"), sheet = 1, colNames = FALSE)[[1]]
up <- genesymbol_to_uniprot(gene0)
gene_na <- gene0[which(is.na(up))]
up[is.na(up)] <- extraids[match(gene_na, extraids$symbol), "uniprot"]
gene <- uniprot_to_genesymbol(up)
ensg <- uniprot_to_ensembl(up)
id <- uniprot_to_geneid(up)
gname <- uniprot_to_genename(up, parallelize = TRUE)
santos2019_DDAA <- tibble(symbol = gene,
uniprot = up,
ensembl = ensg,
entrez = id,
name = gname)
write.csv(santos2019_DDAA, path_santos2019_DDAA, row.names = FALSE)
}
# Library statistics (mapped genes)
santos2019_DDAAF <- santos2019_DDAA %>%
group_by(uniprot) %>%
slice(1) %>% # Remove duplicate uniprot IDs
ungroup() %>%
filter(!is.na(uniprot)) # Remove unknowns
n_santos2019_DDAA <- nrow(santos2019_DDAA)
n_santos2019_DDAAF <- nrow(santos2019_DDAAF)
wt-CFTR interactome:
* All data: 945 genes.
* After name updating: 936 genes.
DD/AA-CFTR interactome:
* All data: 979 genes.
* After name updating: 970 genes.
Primary traffic hits & Santos2019
| id | symbol | uniprot | name |
|---|---|---|---|
| 1 | CCT5 | P48643 | T-complex protein 1 subunit epsilon |
| 2 | DDX5 | P17844 | Probable ATP-dependent RNA helicase DDX5 |
| 3 | MYH14 | Q7Z406 | Myosin-14 |
| 4 | PABPC3 | Q9H361 | Polyadenylate-binding protein 3 |
| 5 | PSMA6 | P60900 | Proteasome subunit alpha type-6 |
J Mol Biol (2018) 430: 2951-2973
A Proteomic Variant Approach (ProVarA) for Personalized Medicine
of Inherited and Somatic Disease
Darren M. Hutt, Salvatore Loguercio, Alexandre Rosa Campos and William
E. Balch https://doi.org/10.1016/j.jmb.2018.06.017
A cohort of proteins that are differentially interacting with a broad spectrum of CFTR variants (including F508del) in CFBE cells, identified through a co-purifying protein identification technology (CoPIT) methodology [23], an immunoprecipitation-based proteomic approach: ProVarA.
The authors create a proteomic methodology called proteomic variant approach (ProVarA), which uses the CoPIT methodoly in order capture critical protein interaction profiles (PIPs) to understand the onset and progression of variant-specific disease. They apply this method to 1) interrogate the impact of different CF-causing mutations on the functional interactions of CFTR, as well as 2) assess the impact of therapeutics on the profiles of the disease-causing variants. Besides mapping the PIPs, the authors are also able to pinpoint the binding affinities of the proteins within the PIPs, which is one of the greatest strengths of this methodology. This work was performed on CFBE41o- transiently transfected with CFTR variants.
An important outcome of this study are the PIPs for WT- and F508del-CFTR in Table S1 and S2 respectively. Other outputs include PIPs for other CFTR variants (i.e. G85E, R560T, N1303K and G551D) as well as G551D-CFTR treated with VX-770 and F508del-CFTR treated with VX-809. Importantly, the PIPs for each condition were selected if they exhibited a statistically significant difference in recovery relative to the control CFTR immunoprecipitation in GFP transduced CFBE41o- null cells.
# wt-CFTR interactome
# Get data
if(file.exists(path_hutt2018_wt)){
hutt2018_wt <- read_csv(path_hutt2018_wt)
} else{
tmp <- tempfile()
download.file(url = src_hutt2018_wt, destfile = tmp, mode="wb")
df <- tibble(read.xlsx(tmp, sheet = 2)) %>%
mutate(intensity = as.numeric(.[[4]])) %>%
filter(!is.na(intensity))
up0 <- df$Proteins
up <- unlist(strsplit(up0, ";"))
gene <- uniprot_to_genesymbol(up)
up_na <- up[which(is.na(gene))]
gene[is.na(gene)] <- extraids[match(up_na, extraids$uniprot), "symbol"]
ensg <- uniprot_to_ensembl(up)
id <- uniprot_to_geneid(up)
gname <- uniprot_to_genename(up, parallelize = TRUE)
hutt2018_wt <- tibble(symbol = gene,
uniprot = up,
ensembl = ensg,
entrez = id,
name = gname)
write.csv(hutt2018_wt, path_hutt2018_wt, row.names = FALSE)
}
# Library statistics (mapped genes)
bad_up <- extraids %>%
filter(!is.na(uniprot) & !in_database & uniprot %in% hutt2018_wt$uniprot) %>%
pull(uniprot) # Deprecated Uniprot IDs
hutt2018_wtF <- hutt2018_wt %>%
group_by(uniprot) %>%
slice(1) %>% # Remove duplicate uniprot IDs
ungroup() %>%
filter(!is.na(uniprot)) %>% # Remove unknowns
filter(!uniprot %in% bad_up)
n_hutt2018_wt <- nrow(hutt2018_wt)
n_hutt2018_wtF <- nrow(hutt2018_wtF)
# F508del-CFTR-CFTR interactome
# Get data
if(file.exists(path_hutt2018_df)){
hutt2018_df <- read_csv(path_hutt2018_df)
} else{
tmp <- tempfile()
download.file(url = src_hutt2018_df, destfile = tmp, mode="wb")
df <- tibble(read.xlsx(tmp, sheet = 2)) %>%
mutate(intensity = as.numeric(.[[4]])) %>%
filter(!is.na(intensity))
up0 <- df$Proteins
up <- unlist(strsplit(up0, ";"))
gene <- uniprot_to_genesymbol(up)
up_na <- up[which(is.na(gene))]
gene[is.na(gene)] <- extraids[match(up_na, extraids$uniprot), "symbol"]
ensg <- uniprot_to_ensembl(up)
id <- uniprot_to_geneid(up)
gname <- uniprot_to_genename(up, parallelize = TRUE)
hutt2018_df <- tibble(symbol = gene,
uniprot = up,
ensembl = ensg,
entrez = id,
name = gname)
write.csv(hutt2018_df, path_hutt2018_df, row.names = FALSE)
}
# Library statistics (mapped genes)
bad_up <- extraids %>%
filter(!is.na(uniprot) & !in_database & uniprot %in% hutt2018_df$uniprot) %>%
pull(uniprot) # Deprecated Uniprot IDs
hutt2018_dfF <- hutt2018_df %>%
group_by(uniprot) %>%
slice(1) %>% # Remove duplicate uniprot IDs
ungroup() %>%
filter(!is.na(uniprot)) %>% # Remove unknowns
filter(!uniprot %in% bad_up)
n_hutt2018_df <- nrow(hutt2018_df)
n_hutt2018_dfF <- nrow(hutt2018_dfF)
wt-CFTR interactome:
* All data: 731 genes.
* After name updating: 656 genes.
F508del-CFTR interactome:
* All data: 621 genes.
* After name updating: 558 genes.
Primary traffic hits & Hutt2018
| id | symbol | uniprot | name |
|---|---|---|---|
| 1 | CCT5 | P48643 | T-complex protein 1 subunit epsilon |
| 2 | DDX5 | P17844 | Probable ATP-dependent RNA helicase DDX5 |
| 3 | MYH14 | Q7Z406 | Myosin-14 |
| 4 | PABPC3 | Q9H361 | Polyadenylate-binding protein 3 |
| 5 | PSMA6 | P60900 | Proteasome subunit alpha type-6 |
J Proteome Res (2014) 13(11): 4668-4675
Quantitative Proteomic Profiling Reveals Differentially
Regulated Proteins in Cystic Fibrosis Cells
Navin Rauniyar, Vijay Gupta, William E. Balch, and John R. Yates,
III
https://doi.org/10.1021/pr500370g
Differential protein expression between a cell line model of cystic fibrosis (i.e., bronchial epithelial cells expressing F508del-CFTR (CFBE cells)) and wild-type CFTR (HBE cells).
The authors use LC-MS/MS and the MudPIT method to investigate the differential protein expression between a cell line model of cystic fibrosis (i.e., bronchial epithelial cells expressing F508del-CFTR, CFBE cells) and wild-type CFTR (HBE cells). This is a global proteomics approach to understand how mutant CFTR may lead to perturbed molecular pathways.
The most important outcome of this study is the raw data presented in Table S1, which contains the list of identified proteins from the HBE (sheet 1) and CFBE (sheet 2), as well as a common proteins list (sheet 3). Although the authors include all the data they obtained, they advise (correctly) to only consider the proteins that were present in all three biological replicates, which is indicated by the column “Occurrence” (occurrence = 3). These are 3586 proteins for the HBE cells, and 3566 for the CFBE cells. Uniprot ID information is also present in this table.
To be done.
Cell (2013) 154(6):1390-400
High-content siRNA screen reveals global ENaC regulators and
potential cystic fibrosis therapy targets
Joana Almaça, Diana Faria, Marisa Sousa, Inna Uliyakina, Christian
Conrad, Lalida Sirianant, Luka A Clarke, José Paulo Martins, Miguel
Santos, Jean-Karim Heriché, Wolfgang Huber, Rainer Schreiber, Rainer
Pepperkok, Karl Kunzelmann, Margarida D Amaral
https://doi.org/10.1016/j.cell.2013.08.045
# Validated ENaC hits
# Get data
if(file.exists(path_almaca2013)){
almaca2013 <- read_csv(path_almaca2013)
} else{
tmp <- tempfile()
download.file(url = src_almaca2013, destfile = tmp, mode="wb")
df <- tibble(read.xlsx(tmp, sheet = 2, startRow = 5)) %>%
filter(!is.na(Ensemble.ID))
ensg0 <- df$Ensemble.ID
up <- ensembl_to_uniprot(ensg0)
ensg_na <- ensg0[which(is.na(up))] # Emsembl IDs for which a gene symbol could not be found
up[is.na(up)] <- extraids[match(ensg_na, extraids$ensembl), "uniprot"] # Add missing uniprot IDs
gene <- uniprot_to_genesymbol(up)
ensg <- uniprot_to_ensembl(up)
id <- uniprot_to_geneid(up)
gname <- uniprot_to_genename(up, parallelize = TRUE)
almaca2013 <- tibble(symbol = gene,
uniprot = up,
ensembl = ensg,
entrez = id,
name = gname)
write.csv(almaca2013, path_almaca2013, row.names = FALSE)
}
# Library statistics (mapped genes)
almaca2013F <- almaca2013 %>%
group_by(uniprot) %>%
slice(1) %>% # Remove duplicate uniprot IDs
ungroup() %>%
filter(!is.na(uniprot)) # Remove unknowns
n_almaca2013 <- nrow(almaca2013)
n_almaca2013F <- nrow(almaca2013F)
ENaC activating genes:
* All data: 166 genes.
* After name updating: 165 genes.
Primary traffic hits & Almaça2013
| id | symbol | uniprot | name |
|---|---|---|---|
| 1 | FRK | P42685 | Tyrosine-protein kinase FRK |
| 2 | GRK5 | P34947 | G protein-coupled receptor kinase 5 |
| 3 | ITPK1 | Q13572 | Inositol-tetrakisphosphate 1-kinase |
| 4 | PPP1R1B | Q9UD71 | Protein phosphatase 1 regulatory subunit 1B |
J Biol Chem (2018) 293(4):1203-1217
High-throughput screening identifies FAU protein as a
regulator of mutant cystic fibrosis transmembrane conductance regulator
channel
Valeria Tomati, Emanuela Pesce, Emanuela Caci, Elvira Sondo, Paolo
Scudieri, Monica Marini, Felice Amato, Giuseppe Castaldo, Roberto
Ravazzolo, Luis J.V.Galietta, Nicoletta Pedemonte
https://doi.org/10.1074/jbc.M117.816595
Genes whose silencing significantly rescued F508del-CFTR activity, as indicated by enhanced anion transport through the plasma membrane.
The authors performed the first genome-wide screening of a druggable-genome library of siRNA molecules in bronchial epithelial cells to find proteins whose suppression results in a significant functional and biochemical rescue of F508del-CFTR. They were able to find 37 validated targets, which are displayed in the paper itself in Table 1. They then dissect some of them, particularly FAU, regarding their MoA.
if(file.exists(path_tomati2018)){
tomati2018 <- read_csv(path_tomati2018)
} else{
# Transcribe data
df <- tibble(gene = c("ALOX5AP","ANXA11","CDK6","CHD4","CNTN1","CSNK2A1","FAU","FRAG1","GALK1",
"GJA5","GJB4","GPR149","KCTD8","LRRC59","MLLT6","NR2E3","P2RX1","PHF12",
"PLXNA1","PLXND1","PRPS1","PTCH1","RARA","RBBP6","RNF207","RORB","SEC22B",
"SGK223","SLC25A1","SNRNP25","TLR7","TRIM24","UBA2","UBA52","UBE2C","UBE2I","UBXN6"),
mean = c(1.302,1.307,1.342,1.787,1.309,1.395,1.621,1.382,1.304,1.346,1.293,1.445,1.4,1.37,
1.501,1.419,1.291,1.49,1.484,1.295,1.322,1.425,1.31,1.472,1.449,1.328,1.365,1.411,
1.303,1.519,1.376,1.739,1.653,1.922,1.357,1.639,1.366),
uniprot = c("P20292", "P50995", "Q00534", "Q14839", "Q12860", "P68400", "P62861/P35544",
"Q9UHJ9", "P51570", "P36382", "Q9NTQ9", "Q86SP6", "Q6ZWB6", "Q96AG4",
"P55198", "Q9Y5X4", "P51575", "Q96QT6", "Q9UIW2", "Q9Y4D7", "P60891",
"Q13635", "P10276", "Q7Z6E9", "Q6ZRF8", "Q92753", "O75396", "Q86YV5",
"P53007", "Q9BV90", "Q9NYK1", "O15164", "Q9UBT2", "P62987", "O00762",
"P63279", "Q9BZV1"))
up <- sub("(.*)/(.*)", "\\1", df$uniprot)
gene <- uniprot_to_genesymbol(up)
up_na <- up[which(is.na(gene))]
gene[is.na(gene)] <- extraids[match(up_na, extraids$uniprot), "symbol"]
ensg <- uniprot_to_ensembl(up)
id <- uniprot_to_geneid(up)
gname <- uniprot_to_genename(up, parallelize = T)
tomati2018 <- tibble(symbol = gene,
uniprot = up,
ensembl = ensg,
entrez = id,
name = gname,
score = df$mean)
write.csv(tomati2018, path_tomati2018, row.names = FALSE)
}
# Library statistics (mapped genes)
tomati2018F <- tomati2018 %>%
group_by(uniprot) %>%
slice(1) %>% # Remove duplicate uniprot IDs
ungroup() %>%
filter(!is.na(uniprot)) # Remove unknowns
n_tomati2018 <- nrow(tomati2018)
n_tomati2018F <- nrow(tomati2018F)
Primary traffic hits & Tomati2018
| id | symbol | uniprot | name |
|---|---|---|---|
| 1 | UBE2I | P63279 | SUMO-conjugating enzyme UBC9 |
Nature Cell Biology (2012) 14, 764–774
Genome-wide RNAi screening identifies human proteins with a
regulatory function in the early secretory pathway
Jeremy C. Simpson, Brigitte Joggerst, Vibor Laketa, Fatima Verissimo,
Cihan Cetin, Holger Erfle, Mariana G. Bexiga, Vasanth R. Singan,
Jean-Karim Hériché, Beate Neumann, Alvaro Mateos, Jonathon Blake,
Stephanie Bechtel, Vladimir Benes, Stefan Wiemann, Jan Ellenberg &
Rainer Pepperkok
https://doi.org/10.1038/ncb2510
Proteins influencing secretion of tsO45G in HeLa cells.
# Get data
if(file.exists(path_simpson2012)){
simpson2012 <- read_csv(path_simpson2012)
} else{
tmp <- tempfile()
download.file(url = src_simpson2012, destfile = tmp, mode="wb")
df <- tibble(read_excel(tmp))
ensg0 <- df[["ENSEMBL ID"]]
up <- ensembl_to_uniprot(ensg0)
ensg_na <- ensg0[which(is.na(up))]
up[is.na(up)] <- extraids[match(ensg_na, extraids$ensembl), "uniprot"]
gene <- uniprot_to_genesymbol(up) # Get gene symbols
ensg <- uniprot_to_ensembl(up)
id <- uniprot_to_geneid(up)
gname <- uniprot_to_genename(up, parallelize = TRUE)
simpson2012 <- tibble(symbol = gene,
uniprot = up,
ensembl = ensg,
entrez = id,
name = gname)
write.csv(simpson2012, path_simpson2012, row.names = FALSE)
}
# Library statistics (mapped genes)
simpson2012F <- simpson2012 %>%
group_by(uniprot) %>%
slice(1) %>% # Remove duplicate uniprot IDs
ungroup() %>%
filter(!is.na(uniprot)) # Remove unknowns
n_simpson2012 <- nrow(simpson2012)
n_simpson2012F <- nrow(simpson2012F)
Traffic regulating genes:
* All data: 554 genes.
* After name updating: 554 genes.
Primary traffic hits & Simpson2012
| id | symbol | uniprot | name |
|---|---|---|---|
| 1 | CNKSR1 | Q969H4 | Connector enhancer of kinase suppressor of ras 1 |
| 2 | CNR2 | P34972 | Cannabinoid receptor 2 |
| 3 | DGUOK | Q16854 | Deoxyguanosine kinase, mitochondrial |
| 4 | EMILIN2 | Q9BXX0 | EMILIN-2 |
| 5 | FER | P16591 | Tyrosine-protein kinase Fer |
| 6 | GOSR2 | O14653 | Golgi SNAP receptor complex member 2 |
| 7 | OXSR1 | O95747 | Serine/threonine-protein kinase OSR1 |
| 8 | ZBTB25 | P24278 | Zinc finger and BTB domain-containing protein 25 |
Confirmed traffic hits & Simpson2012
| id | symbol | uniprot | name |
|---|---|---|---|
| 1 | OXSR1 | O95747 | Serine/threonine-protein kinase OSR1 |
Cell (2006) 127(4):803-815
Hsp90 Cochaperone Aha1 Downregulation Rescues Misfolding of CFTR
in Cystic Fibrosis
Xiaodong Wang, John Venable, Paul LaPointe, Darren M. Hutt, Atanas V.
Koulov, Judith Coppinger, Cemal Gurkan, Wendy Kellner, Jeanne Matteson,
Helen Plutner, John R. Riordan, Jeffery W. Kelly, John R. Yates III,
William E. Balch
https://doi.org/10.1016/j.cell.2006.09.043
CFTR-specific proteomes immunoprecipitated from matched BHK cell lines heterologously expressing wt- or F508del-CFTR.
The authors apply mass spectrometry through the use of multidimensional protein identification technology (MudPIT) to begin to define the global protein interactions of CFTR – the CFTR interactome. The work is mostly focused on the ER folding and export proteome, but other wt-CFTR interactors were also found, and some are even discussed in the supplemental discussion.
Experiments were conducted on 4 different cell lines: BHK (kidney, hamster), Calu-3 (lung, human), HT29 (intestine, human), and T84 (intestine, human). Interestingly all the human cell lines are cancer cell lines. Might make sense to consider the full interactome (167 proteins), Table S1, or simply the Calu-3 interactome, Table S5 Protein (50 proteins). Experiments were also conducted on BHK F508del-CFTR cells, but these should probably not be considered.
# wt-CFTR Interactome in BHK cells
# Get data
if(file.exists(path_wang2006_wt)){
wang2006_wt <- read_csv(path_wang2006_wt)
} else{
tmp <- tempfile()
download.file(url = src_wang2006, destfile = tmp, mode="wb")
df <- tibble(read_excel(tmp, sheet = 3))
id0 <- df[[1]]
up <- geneid_to_uniprot(id0)
id_na <- id0[which(is.na(up))] # Gene IDs for which a uniprot ID could not be found
up[is.na(up)] <- extraids[match(id_na, extraids$entrez), "uniprot"] # Add missing uniprot IDs
gene <- uniprot_to_genesymbol(up) # Get gene symbols
ensg <- uniprot_to_ensembl(up)
id <- uniprot_to_geneid(up)
gname <- uniprot_to_genename(up, parallelize = TRUE)
wang2006_wt <- tibble(symbol = gene,
uniprot = up,
ensembl = ensg,
entrez = id,
name = gname)
write.csv(wang2006_wt, path_wang2006_wt, row.names = FALSE)
}
# Library statistics (mapped genes)
wang2006_wtF <- wang2006_wt %>%
group_by(uniprot) %>%
slice(1) %>% # Remove duplicate uniprot IDs
ungroup() %>%
filter(!is.na(uniprot)) # Remove unknowns
n_wang2006_wt <- nrow(wang2006_wt)
n_wang2006_wtF <- nrow(wang2006_wtF)
# F508del-CFTR Interactome in BHK cells
# Get data
if(file.exists(path_wang2006_df)){
wang2006_df <- read_csv(path_wang2006_df)
} else{
tmp <- tempfile()
download.file(url = src_wang2006, destfile = tmp, mode="wb")
df <- tibble(read_excel(tmp, sheet = 5))
id0 <- df[[1]]
up <- geneid_to_uniprot(id0)
id_na <- id0[which(is.na(up))] # Gene IDs for which a uniprot ID could not be found
up[is.na(up)] <- extraids[match(id_na, extraids$entrez), "uniprot"] # Add missing uniprot IDs
gene <- uniprot_to_genesymbol(up) # Get gene symbols
ensg <- uniprot_to_ensembl(up)
id <- uniprot_to_geneid(up)
gname <- uniprot_to_genename(up, parallelize = TRUE)
wang2006_df <- tibble(symbol = gene,
uniprot = up,
ensembl = ensg,
entrez = id,
name = gname)
write.csv(wang2006_df, path_wang2006_df, row.names = FALSE)
}
# Library statistics (mapped genes)
wang2006_dfF <- wang2006_df %>%
group_by(uniprot) %>%
slice(1) %>% # Remove duplicate uniprot IDs
ungroup() %>%
filter(!is.na(uniprot)) # Remove unknowns
n_wang2006_df <- nrow(wang2006_df)
n_wang2006_dfF <- nrow(wang2006_dfF)
wt-CFTR interactome (BHK):
* All data: 76 genes.
* After name updating: 76 genes.
F508del-CFTR interactome (BHK):
* All data: 46 genes.
* After name updating: 46 genes.
Primary traffic hits & Wang2006
Sci Rep (2017) 9;7(1):7642
Targeting the PI3K/Akt/mTOR signalling pathway in Cystic
Fibrosis
R Reilly, M S Mroz, E Dempsey, K Wynne, S J Keely, E F McKone, C Hiebel,
C Behl, J A Coppinger
https://doi.org/10.1038/s41598-017-06588-z
IP-MS profiling of CFTR-containing protein complexes immunoprecipitated from CFBE41o- abd HBE41o- cells expressing F508del-CFTR and wt-CFTR, respectively.
# wt-CFTR Interactome
# Get data
if(file.exists(path_reilly2017_wt)){
reilly2017_wt <- read_csv(path_reilly2017_wt)
} else{
tmp <- tempfile()
download.file(url = src_reilly2017_wt, destfile = tmp, mode="wb")
df <- tibble(read_excel(tmp, skip = 1)) %>%
mutate(gene = sub("(.*)_.*$", "\\1", Accession))
gene0 <- df$gene
up <- genesymbol_to_uniprot(gene0)
gene_na <- gene0[which(is.na(up))]
up[is.na(up)] <- extraids[match(gene_na, extraids$symbol), "uniprot"]
gene <- uniprot_to_genesymbol(up)
ensg <- uniprot_to_ensembl(up)
id <- uniprot_to_geneid(up)
gname <- uniprot_to_genename(up, parallelize = TRUE)
reilly2017_wt <- tibble(symbol = gene,
uniprot = up,
ensembl = ensg,
entrez = id,
name = gname)
write.csv(reilly2017_wt, path_reilly2017_wt, row.names = FALSE)
}
# Library statistics (mapped genes)
reilly2017_wtF <- reilly2017_wt %>%
group_by(uniprot) %>%
slice(1) %>% # Remove duplicate uniprot IDs
ungroup() %>%
filter(!is.na(uniprot)) # Remove unknowns
n_reilly2017_wt <- nrow(reilly2017_wt)
n_reilly2017_wtF <- nrow(reilly2017_wtF)
# F508del-CFTR Interactome
# Get data
if(file.exists(path_reilly2017_df)){
reilly2017_df <- read_csv(path_reilly2017_df)
} else{
tmp <- tempfile()
download.file(url = src_reilly2017_df, destfile = tmp, mode="wb")
df <- tibble(read_excel(tmp, skip = 1)) %>%
mutate(gene = sub("(.*)_.*$", "\\1", Accession))
gene0 <- df$gene
up <- genesymbol_to_uniprot(gene0)
gene_na <- gene0[which(is.na(up))]
up[is.na(up)] <- extraids[match(gene_na, extraids$symbol), "uniprot"]
gene <- uniprot_to_genesymbol(up)
ensg <- uniprot_to_ensembl(up)
id <- uniprot_to_geneid(up)
gname <- uniprot_to_genename(up, parallelize = TRUE)
reilly2017_df <- tibble(symbol = gene,
uniprot = up,
ensembl = ensg,
entrez = id,
name = gname)
write.csv(reilly2017_df, path_reilly2017_df, row.names = FALSE)
}
# Library statistics (mapped genes)
reilly2017_dfF <- reilly2017_df %>%
group_by(uniprot) %>%
slice(1) %>% # Remove duplicate uniprot IDs
ungroup() %>%
filter(!is.na(uniprot)) # Remove unknowns
n_reilly2017_df <- nrow(reilly2017_df)
n_reilly2017_dfF <- nrow(reilly2017_dfF)
wt-CFTR interactome:
* All data: 504 genes.
* After name updating: 491 genes.
F508del-CFTR interactome:
* All data: 506 genes.
* After name updating: 478 genes.
Primary traffic hits & Reilly2017 (wt)
| id | symbol | uniprot | name |
|---|---|---|---|
| 1 | HELLS | Q9NRZ9 | Lymphoid-specific helicase |
| 2 | MYH14 | Q7Z406 | Myosin-14 |
Primary traffic hits & Reilly2017 (F508del)
| id | symbol | uniprot | name |
|---|---|---|---|
| 1 | ARHGEF2 | Q92974 | Rho guanine nucleotide exchange factor 2 |
| 2 | COL5A1 | P20908 | Collagen alpha-1(V) chain |
| 3 | LAMB1 | P07942 | Laminin subunit beta-1 |
| 4 | MGAT1 | P26572 | Alpha-1,3-mannosyl-glycoprotein 2-beta-N-acetylglucosaminyltransferase |
| 5 | MYH14 | Q7Z406 | Myosin-14 |
| 6 | PABPC3 | Q9H361 | Polyadenylate-binding protein 3 |
Primary traffic hits & Reilly2017 (wt & F508del)
| id | symbol | uniprot | name |
|---|---|---|---|
| 1 | MYH14 | Q7Z406 | Myosin-14 |
Cell (2006) 127(6):1265-1281
Quantitative Proteomics Analysis of the Secretory
Pathway
Annalyn Gilchrist, Catherine E. Au, Johan Hiding, Alexander W. Bell,
Julia Fernandez-Rodriguez, Souad Lesimple, Hisao Nagaya, Line Roy, Sara
J.C. Gosline, Michael Hallett, Jacques Paiement, Robert E. Kearney,
Tommy Nilsson, John J.M. Bergeron
https://doi.org/10.1016/j.cell.2006.10.036
Secretory pathway proteome, with information about the intracellular localization and relative presence of each protein in the rough and smooth ER, Golgi cisternae and Golgi-derived COPI vesicles.
I did not manage to analyze these data because I could not process the gene identifiers: NCBI gi numbers.
Sci Signal (2019) 12(562):eaan7984
A posttranslational modification code for CFTR maturation is
altered in cystic fibrosis
Sandra Pankow, Casimir Bamberger, John R Yates 3rd
https://doi.org/10.1126/scisignal.aan7984
wt-CFTR interactome upon treatment with CX-4945 (also known as silmitasertib, a competitive inhibitor of CK2alpha).
# wild-type CFTR interactome upon CX-4945 treatment
# Get data
if(file.exists(path_pankow2019)){
pankow2019 <- read_csv(path_pankow2019)
} else{
tmp <- tempfile()
download.file(url = src_pankow2019, destfile = tmp, mode="wb")
df <- tibble(read.xlsx(tmp)) %>%
filter(Protein.evidence == "CONCLUSIVE") %>%
rename(uniprot = `ACC(s)`) %>%
mutate(uniprot = gsub("Reverse_", "", uniprot))
up <- df$uniprot
gene <- uniprot_to_genesymbol(up)
up_na <- up[which(is.na(gene))]
gene[is.na(gene)] <- extraids[match(up_na, extraids$uniprot), "symbol"]
ensg <- uniprot_to_ensembl(up)
id <- uniprot_to_geneid(up)
gname <- uniprot_to_genename(up, parallelize = TRUE)
pankow2019 <- tibble(symbol = gene,
uniprot = up,
ensembl = ensg,
entrez = id,
name = gname)
write.csv(pankow2019, path_pankow2019, row.names = FALSE)
}
# Library statistics (mapped genes)
pankow2019F <- pankow2019 %>%
group_by(uniprot) %>%
slice(1) %>% # Remove duplicate uniprot IDs
ungroup() %>%
filter(!is.na(uniprot)) # Remove unknowns
n_pankow2019 <- nrow(pankow2019)
n_pankow2019F <- nrow(pankow2019F)
wt-CFTR interactome, CX-4945:
* All data: 2080 genes.
* After name updating: 2071 genes.
| id | symbol | uniprot | name |
|---|---|---|---|
| 1 | ARHGEF2 | Q92974 | Rho guanine nucleotide exchange factor 2 |
| 2 | ARVCF | O00192 | Armadillo repeat protein deleted in velo-cardio-facial syndrome |
| 3 | ATP1B3 | P54709 | Sodium/potassium-transporting ATPase subunit beta-3 |
| 4 | AURKA | O14965 | Aurora kinase A |
| 5 | BAG3 | O95817 | BAG family molecular chaperone regulator 3 |
| 6 | CCT5 | P48643 | T-complex protein 1 subunit epsilon |
| 7 | CLK1 | P49759 | Dual specificity protein kinase CLK1 |
| 8 | DDX5 | P17844 | Probable ATP-dependent RNA helicase DDX5 |
| 9 | DNAJC8 | O75937 | DnaJ homolog subfamily C member 8 |
| 10 | HELLS | Q9NRZ9 | Lymphoid-specific helicase |
| 11 | LAMA3 | Q16787 | Laminin subunit alpha-3 |
| 12 | LAMB1 | P07942 | Laminin subunit beta-1 |
| 13 | MICB | Q29980 | MHC class I polypeptide-related sequence B |
| 14 | MMS19 | Q96T76 | MMS19 nucleotide excision repair protein homolog |
| 15 | MYH14 | Q7Z406 | Myosin-14 |
| 16 | NRIP1 | P48552 | Nuclear receptor-interacting protein 1 |
| 17 | OXSR1 | O95747 | Serine/threonine-protein kinase OSR1 |
| 18 | PDE8A | O60658 | High affinity cAMP-specific and IBMX-insensitive 3’,5’-cyclic phosphodiesterase 8A |
| 19 | PSMA6 | P60900 | Proteasome subunit alpha type-6 |
| 20 | PSMB1 | P20618 | Proteasome subunit beta type-1 |
| 21 | RELA | Q04206 | Transcription factor p65 |
| 22 | SLC34A1 | Q06495 | Sodium-dependent phosphate transport protein 2A |
| 23 | VPS26A | O75436 | Vacuolar protein sorting-associated protein 26A |
PLoS One (2020) 15(11):e0239189
Mining GWAS and eQTL data for CF lung disease modifiers by gene
expression imputation
Hong Dang, Deepika Polineni, Rhonda G Pace, Jaclyn R Stonebraker,
Harriet Corvol, Garry R Cutting, Mitchell L Drumm, Lisa J Strug, Wanda K
O’Neal, Michael R Knowles
https://doi.org/10.1371/journal.pone.0239189
Candidate modifier genes of CF lung disease. Obtained from an integrted analysis of expression data from CF cohorts and Genotype-Tissue Expression (GTEx) reference data sets from multiple human tissues to generate predictive models, which were used to impute transcriptional regulation from genetic variance in a GWAS population.
# lung disease modifier genes
# Get data
if(file.exists(path_dang2020)){
dang2020 <- read_csv(path_dang2020)
} else{
tmp <- tempfile()
download.file(url = src_dang2020, destfile = tmp, mode="wb")
df <- tibble(read.xlsx(tmp))
ensg0 <- df$ENSGID
up <- ensembl_to_uniprot(ensg0)
ensg_na <- ensg0[which(is.na(up))]
up[is.na(up)] <- extraids[match(ensg_na, extraids$ensembl), "uniprot"]
gene <- uniprot_to_genesymbol(up) # Get gene symbols
ensg <- uniprot_to_ensembl(up)
id <- uniprot_to_geneid(up)
gname <- uniprot_to_genename(up, parallelize = TRUE)
dang2020 <- tibble(symbol = gene,
uniprot = up,
ensembl = ensg,
entrez = id,
name = gname)
write.csv(dang2020, path_dang2020, row.names = FALSE)
}
# Library statistics (mapped genes)
dang2020F <- dang2020 %>%
group_by(uniprot) %>%
slice(1) %>% # Remove duplicate uniprot IDs
ungroup() %>%
filter(!is.na(uniprot)) # Remove unknowns
n_dang2020 <- nrow(dang2020)
n_dang2020F <- nrow(dang2020F)
Primary traffic hits & Dang2020
| id | symbol | uniprot | name |
|---|---|---|---|
| 1 | ARVCF | O00192 | Armadillo repeat protein deleted in velo-cardio-facial syndrome |
| 2 | ATXN1 | P54253 | Ataxin-1 |
| 3 | DNAJB7 | Q7Z6W7 | DnaJ homolog subfamily B member 7 |
| 4 | FRK | P42685 | Tyrosine-protein kinase FRK |
| 5 | NRIP1 | P48552 | Nuclear receptor-interacting protein 1 |
| 6 | TIMM10 | P62072 | Mitochondrial import inner membrane translocase subunit Tim10 |
Confirmed traffic hits & Dang2020
| id | symbol | uniprot | name |
|---|---|---|---|
| 1 | ARVCF | O00192 | Armadillo repeat protein deleted in velo-cardio-facial syndrome |
Sci Rep (2020) 10(1):20553
Integrative genomic meta‑analysis reveals novel molecular
insights into cystic fibrosis and deltaF508‑CFTR rescue
Rachel A Hodos, Matthew D Strub, Shyam Ramachandran, Li Li, Paul B
McCray Jr, Joel T Dudley
http://dx.doi.org/10.1038/s41598-020-76347-0
Meta analyiss of publicly available datasets. CFTR Gene Set Library comprising 60 gene sets with various associations to CF or CFTR.
This work is the largest CF transcriptomic meta-analysis to date, having the advantage of being a quantitative meta-analysis, combining fold-change estimates across studies.
A major output of this work is the CFTR Gene Set Library compiled from 60 gene sets from 34 publications, which should be of great interest to future genomic studies of CF and CFTR.
# Differentially expressed genes
# Get data
if(file.exists(path_hodos2020_deg)){
hodos2020_deg <- read_csv(path_hodos2020_deg)
} else{
tmp <- tempfile()
download.file(url = src_hodos2020_deg, destfile = tmp, mode="wb")
names_sh <- getSheetNames(tmp)
df <- lapply(1:length(names_sh), function(x){
temp <- read.xlsx(tmp, sheet = x)
temp$signature <- names_sh[x]
temp
})
df <- tibble(do.call(rbind, df))
id0 <- df$GeneID
up <- geneid_to_uniprot(id0)
id_na <- id0[which(is.na(up))]
up[is.na(up)] <- extraids[match(id_na, extraids$entrez), "uniprot"]
gene <- uniprot_to_genesymbol(up)
ensg <- uniprot_to_ensembl(up)
id <- uniprot_to_geneid(up)
gname <- uniprot_to_genename(up, parallelize = TRUE)
hodos2020_deg <- tibble(symbol = gene,
uniprot = up,
ensembl = ensg,
entrez = id,
name = gname,
signature = df$signature)
write.csv(hodos2020_deg, path_hodos2020_deg, row.names = FALSE)
}
# Library statistics (mapped genes)
hodos2020_degF <- hodos2020_deg %>%
group_by(uniprot) %>%
slice(1) %>% # Remove duplicate uniprot IDs
ungroup() %>%
filter(!is.na(uniprot)) # Remove unknowns
n_hodos2020_deg <- nrow(hodos2020_deg)
n_hodos2020_degF <- nrow(hodos2020_degF)
# Core Gene Signatures
# Get data
if(file.exists(path_hodos2020_cosig)){
hodos2020_cosig <- read_csv(path_hodos2020_cosig)
} else{
tmp <- tempfile()
download.file(url = src_hodos2020_cosig, destfile = tmp, mode="wb")
names_sh <- getSheetNames(tmp)
df <- lapply(1:length(names_sh), function(x){
temp <- read.xlsx(tmp, sheet = x)
temp <- temp[,c("Symbol", "GeneID")]
temp$signature <- names_sh[x]
temp
})
df <- tibble(do.call(rbind, df))
id0 <- df$GeneID
up <- geneid_to_uniprot(id0)
id_na <- id0[which(is.na(up))]
up[is.na(up)] <- extraids[match(id_na, extraids$entrez), "uniprot"]
gene <- uniprot_to_genesymbol(up)
ensg <- uniprot_to_ensembl(up)
id <- uniprot_to_geneid(up)
gname <- uniprot_to_genename(up, parallelize = TRUE)
hodos2020_cosig <- tibble(symbol = gene,
uniprot = up,
ensembl = ensg,
entrez = id,
name = gname,
signature = df$signature)
write.csv(hodos2020_cosig, path_hodos2020_cosig, row.names = FALSE)
}
# Library statistics (mapped genes)
hodos2020_cosigF <- hodos2020_cosig %>%
group_by(uniprot) %>%
slice(1) %>% # Remove duplicate uniprot IDs
ungroup() %>%
filter(!is.na(uniprot)) # Remove unknowns
n_hodos2020_cosig <- nrow(hodos2020_cosig)
n_hodos2020_cosigF <- nrow(hodos2020_cosigF)
# Gene Signatures
# Get data
if(file.exists(path_hodos2020_allsig)){
hodos2020_allsig <- read_csv(path_hodos2020_allsig)
} else{
tmp <- tempfile()
download.file(url = src_hodos2020_allsig, destfile = tmp, mode="wb")
df <- tibble(read_excel(tmp)) %>%
select(`Gene Set`, `Entrez IDs`) %>%
rename(gene_set = `Gene Set`,
entrez = `Entrez IDs`) %>%
separate_rows(entrez, sep = ", ")
id0 <- df$entrez
up <- geneid_to_uniprot(id0)
id_na <- id0[which(is.na(up))]
up[is.na(up)] <- extraids[match(id_na, extraids$entrez), "uniprot"]
gene <- uniprot_to_genesymbol(up)
ensg <- uniprot_to_ensembl(up)
id <- uniprot_to_geneid(up)
gname <- uniprot_to_genename(up, parallelize = TRUE)
hodos2020_allsig <- tibble(symbol = gene,
uniprot = up,
ensembl = ensg,
entrez = id,
name = gname,
geneset = df$gene_set)
write.csv(hodos2020_allsig, path_hodos2020_allsig, row.names = FALSE)
}
# Library statistics (mapped genes)
hodos2020_allsigF <- hodos2020_allsig %>%
group_by(uniprot) %>%
slice(1) %>% # Remove duplicate uniprot IDs
ungroup() %>%
filter(!is.na(uniprot)) # Remove unknowns
n_hodos2020_allsig <- nrow(hodos2020_allsig)
n_hodos2020_allsigF <- nrow(hodos2020_allsigF)
CF differentially expressed genes:
* All data: 14371 genes.
* After name updating and excluding non-protein coding genes: 7286
genes.
Core Gene Signatures:
* All data: 88972 genes.
* After name updating and excluding non-protein coding genes: 18712
genes.
All Gene Signatures:
* All data: 10965 genes.
* After name updating and excluding non-protein coding genes: 6459
genes.
Primary traffic hits & Hodos2020 DEG
| id | symbol | uniprot | name |
|---|---|---|---|
| 1 | ABR | Q12979 | Active breakpoint cluster region-related protein |
| 2 | ACOT4 | Q8N9L9 | Peroxisomal succinyl-coenzyme A thioesterase |
| 3 | ADAMTS6 | Q9UKP5 | A disintegrin and metalloproteinase with thrombospondin motifs 6 |
| 4 | APOBEC3A | P31941 | DNA dC->dU-editing enzyme APOBEC-3A |
| 5 | ARL4D | P49703 | ADP-ribosylation factor-like protein 4D |
| 6 | ATP1B3 | P54709 | Sodium/potassium-transporting ATPase subunit beta-3 |
| 7 | ATXN1 | P54253 | Ataxin-1 |
| 8 | C3AR1 | Q16581 | C3a anaphylatoxin chemotactic receptor |
| 9 | CCR8 | P51685 | C-C chemokine receptor type 8 |
| 10 | CCT5 | P48643 | T-complex protein 1 subunit epsilon |
| 11 | CDH2 | P19022 | Cadherin-2 |
| 12 | CITED2 | Q99967 | Cbp/p300-interacting transactivator 2 |
| 13 | CLK1 | P49759 | Dual specificity protein kinase CLK1 |
| 14 | DAB2 | P98082 | Disabled homolog 2 |
| 15 | DCP1A | Q9NPI6 | mRNA-decapping enzyme 1A |
| 16 | DDX5 | P17844 | Probable ATP-dependent RNA helicase DDX5 |
| 17 | DRD4 | P21917 | D(4) dopamine receptor |
| 18 | EIF4G3 | O43432 | Eukaryotic translation initiation factor 4 gamma 3 |
| 19 | ETV4 | P43268 | ETS translocation variant 4 |
| 20 | EYA3 | Q99504 | Eyes absent homolog 3 |
| 21 | F5 | P12259 | Coagulation factor V |
| 22 | FER | P16591 | Tyrosine-protein kinase Fer |
| 23 | FGD6 | Q6ZV73 | FYVE, RhoGEF and PH domain-containing protein 6 |
| 24 | FHL3 | Q13643 | Four and a half LIM domains protein 3 |
| 25 | GALK2 | Q01415 | N-acetylgalactosamine kinase |
| 26 | GJB2 | P29033 | Gap junction beta-2 protein |
| 27 | GLP1R | P43220 | Glucagon-like peptide 1 receptor |
| 28 | GNG4 | P50150 | Guanine nucleotide-binding protein G(I)/G(S)/G(O) subunit gamma-4 |
| 29 | GPR162 | Q16538 | Probable G-protein coupled receptor 162 |
| 30 | GZF1 | Q9H116 | GDNF-inducible zinc finger protein 1 |
| 31 | HS2ST1 | Q7LGA3 | Heparan sulfate 2-O-sulfotransferase 1 |
| 32 | ITPK1 | Q13572 | Inositol-tetrakisphosphate 1-kinase |
| 33 | JAG2 | Q9Y219 | Protein jagged-2 |
| 34 | KLF15 | Q9UIH9 | Krueppel-like factor 15 |
| 35 | LAMB2 | P55268 | Laminin subunit beta-2 |
| 36 | LDLRAD3 | Q86YD5 | Low-density lipoprotein receptor class A domain-containing protein 3 |
| 37 | LMO2 | P25791 | Rhombotin-2 |
| 38 | LMO3 | Q8TAP4 | LIM domain only protein 3 |
| 39 | MAK | P20794 | Serine/threonine-protein kinase MAK |
| 40 | MC5R | P33032 | Melanocortin receptor 5 |
| 41 | MFHAS1 | Q9Y4C4 | Malignant fibrous histiocytoma-amplified sequence 1 |
| 42 | MGAT1 | P26572 | Alpha-1,3-mannosyl-glycoprotein 2-beta-N-acetylglucosaminyltransferase |
| 43 | MICB | Q29980 | MHC class I polypeptide-related sequence B |
| 44 | MMS19 | Q96T76 | MMS19 nucleotide excision repair protein homolog |
| 45 | MYH14 | Q7Z406 | Myosin-14 |
| 46 | NCBP2 | P52298 | Nuclear cap-binding protein subunit 2 |
| 47 | NDUFB2 | O95178 | NADH dehydrogenase [ubiquinone] 1 beta subcomplex subunit 2, mitochondrial |
| 48 | NFIL3 | Q16649 | Nuclear factor interleukin-3-regulated protein |
| 49 | NRDC | O43847 | Nardilysin |
| 50 | NRIP1 | P48552 | Nuclear receptor-interacting protein 1 |
| 51 | NTNG2 | Q96CW9 | Netrin-G2 |
| 52 | OXSR1 | O95747 | Serine/threonine-protein kinase OSR1 |
| 53 | PAFAH2 | Q99487 | Platelet-activating factor acetylhydrolase 2, cytoplasmic |
| 54 | PARG | Q86W56 | Poly(ADP-ribose) glycohydrolase |
| 55 | PCDH17 | O14917 | Protocadherin-17 |
| 56 | PCDH19 | Q8TAB3 | Protocadherin-19 |
| 57 | PDE8A | O60658 | High affinity cAMP-specific and IBMX-insensitive 3’,5’-cyclic phosphodiesterase 8A |
| 58 | PKMYT1 | Q99640 | Membrane-associated tyrosine- and threonine-specific cdc2-inhibitory kinase |
| 59 | PLK2 | Q9NYY3 | Serine/threonine-protein kinase PLK2 |
| 60 | PPP1R3D | O95685 | Protein phosphatase 1 regulatory subunit 3D |
| 61 | PRICKLE1 | Q96MT3 | Prickle-like protein 1 |
| 62 | PRSS12 | P56730 | Neurotrypsin |
| 63 | PTER | Q96BW5 | Phosphotriesterase-related protein |
| 64 | PTGER4 | P35408 | Prostaglandin E2 receptor EP4 subtype |
| 65 | PTOV1 | Q86YD1 | Prostate tumor-overexpressed gene 1 protein |
| 66 | PTPRJ | Q12913 | Receptor-type tyrosine-protein phosphatase eta |
| 67 | RAB4B | P61018 | Ras-related protein Rab-4B |
| 68 | RB1 | P06400 | Retinoblastoma-associated protein |
| 69 | RNASE3 | P12724 | Eosinophil cationic protein |
| 70 | SCPEP1 | Q9HB40 | Retinoid-inducible serine carboxypeptidase |
| 71 | SEMA6B | Q9H3T3 | Semaphorin-6B |
| 72 | SGPP1 | Q9BX95 | Sphingosine-1-phosphate phosphatase 1 |
| 73 | SIPA1 | Q96FS4 | Signal-induced proliferation-associated protein 1 |
| 74 | SIX4 | Q9UIU6 | Homeobox protein SIX4 |
| 75 | SLC25A37 | Q9NYZ2 | Mitoferrin-1 |
| 76 | SLC2A13 | Q96QE2 | Proton myo-inositol cotransporter |
| 77 | SLC30A1 | Q9Y6M5 | Zinc transporter 1 |
| 78 | SREBF1 | P36956 | Sterol regulatory element-binding protein 1 |
| 79 | STYK1 | Q6J9G0 | Tyrosine-protein kinase STYK1 |
| 80 | SYNJ2 | O15056 | Synaptojanin-2 |
| 81 | TEKT2 | Q9UIF3 | Tektin-2 |
| 82 | TRUB1 | Q8WWH5 | Probable tRNA pseudouridine synthase 1 |
| 83 | TXNDC9 | O14530 | Thioredoxin domain-containing protein 9 |
| 84 | UQCRC1 | P31930 | Cytochrome b-c1 complex subunit 1, mitochondrial |
| 85 | VWF | P04275 | von Willebrand factor |
| 86 | ZNF16 | P17020 | Zinc finger protein 16 |
| 87 | ZNF438 | Q7Z4V0 | Zinc finger protein 438 |
| 88 | ZNF616 | Q08AN1 | Zinc finger protein 616 |
| 89 | ZNF662 | Q6ZS27 | Zinc finger protein 662 |
| 90 | ZNF692 | Q9BU19 | Zinc finger protein 692 |
Confirmed traffic hits & Hodos2020 DEG
| id | symbol | uniprot | name |
|---|---|---|---|
| 1 | ADAMTS6 | Q9UKP5 | A disintegrin and metalloproteinase with thrombospondin motifs 6 |
| 2 | CITED2 | Q99967 | Cbp/p300-interacting transactivator 2 |
| 3 | GJB2 | P29033 | Gap junction beta-2 protein |
| 4 | LDLRAD3 | Q86YD5 | Low-density lipoprotein receptor class A domain-containing protein 3 |
| 5 | NTNG2 | Q96CW9 | Netrin-G2 |
| 6 | OXSR1 | O95747 | Serine/threonine-protein kinase OSR1 |
| 7 | SLC30A1 | Q9Y6M5 | Zinc transporter 1 |
| 8 | STYK1 | Q6J9G0 | Tyrosine-protein kinase STYK1 |
Primary traffic hits & Hodos2020 core gene signatures
| id | symbol | uniprot | name |
|---|---|---|---|
| 1 | ABCB4 | P21439 | Phosphatidylcholine translocator ABCB4 |
| 2 | ABR | Q12979 | Active breakpoint cluster region-related protein |
| 3 | ACOT4 | Q8N9L9 | Peroxisomal succinyl-coenzyme A thioesterase |
| 4 | ACSBG2 | Q5FVE4 | Long-chain-fatty-acid–CoA ligase ACSBG2 |
| 5 | ACTR8 | Q9H981 | Actin-related protein 8 |
| 6 | ADAMTS6 | Q9UKP5 | A disintegrin and metalloproteinase with thrombospondin motifs 6 |
| 7 | ADCY10 | Q96PN6 | Adenylate cyclase type 10 |
| 8 | ADORA1 | P30542 | Adenosine receptor A1 |
| 9 | AKT2 | P31751 | RAC-beta serine/threonine-protein kinase |
| 10 | APOA5 | Q6Q788 | Apolipoprotein A-V |
| 11 | APOB | P04114 | Apolipoprotein B-100 |
| 12 | APOBEC1 | P41238 | C->U-editing enzyme APOBEC-1 |
| 13 | APOBEC3A | P31941 | DNA dC->dU-editing enzyme APOBEC-3A |
| 14 | ARHGEF2 | Q92974 | Rho guanine nucleotide exchange factor 2 |
| 15 | ARL4D | P49703 | ADP-ribosylation factor-like protein 4D |
| 16 | ARVCF | O00192 | Armadillo repeat protein deleted in velo-cardio-facial syndrome |
| 17 | ASMT | P46597 | Acetylserotonin O-methyltransferase |
| 18 | ATP1B3 | P54709 | Sodium/potassium-transporting ATPase subunit beta-3 |
| 19 | ATXN1 | P54253 | Ataxin-1 |
| 20 | AURKA | O14965 | Aurora kinase A |
| 21 | BAG3 | O95817 | BAG family molecular chaperone regulator 3 |
| 22 | BANK1 | Q8NDB2 | B-cell scaffold protein with ankyrin repeats |
| 23 | BCL6B | Q8N143 | B-cell CLL/lymphoma 6 member B protein |
| 24 | BLK | P51451 | Tyrosine-protein kinase Blk |
| 25 | C3AR1 | Q16581 | C3a anaphylatoxin chemotactic receptor |
| 26 | CACNA1C | Q13936 | Voltage-dependent L-type calcium channel subunit alpha-1C |
| 27 | CAPN12 | Q6ZSI9 | Calpain-12 |
| 28 | CCL17 | Q92583 | C-C motif chemokine 17 |
| 29 | CCL27 | Q9Y4X3 | C-C motif chemokine 27 |
| 30 | CCR8 | P51685 | C-C chemokine receptor type 8 |
| 31 | CCT5 | P48643 | T-complex protein 1 subunit epsilon |
| 32 | CCT8L2 | Q96SF2 | T-complex protein 1 subunit theta-like 2 |
| 33 | CDH2 | P19022 | Cadherin-2 |
| 34 | CHRM1 | P11229 | Muscarinic acetylcholine receptor M1 |
| 35 | CITED2 | Q99967 | Cbp/p300-interacting transactivator 2 |
| 36 | CLDN4 | O14493 | Claudin-4 |
| 37 | CLK1 | P49759 | Dual specificity protein kinase CLK1 |
| 38 | CNKSR1 | Q969H4 | Connector enhancer of kinase suppressor of ras 1 |
| 39 | CNR2 | P34972 | Cannabinoid receptor 2 |
| 40 | COL5A1 | P20908 | Collagen alpha-1(V) chain |
| 41 | CPZ | Q66K79 | Carboxypeptidase Z |
| 42 | CREBBP | Q92793 | CREB-binding protein |
| 43 | CRX | O43186 | Cone-rod homeobox protein |
| 44 | CUBN | O60494 | Cubilin |
| 45 | CUL5 | Q93034 | Cullin-5 |
| 46 | CYP19A1 | P11511 | Aromatase |
| 47 | CYSLTR2 | Q9NS75 | Cysteinyl leukotriene receptor 2 |
| 48 | DAB2 | P98082 | Disabled homolog 2 |
| 49 | DCP1A | Q9NPI6 | mRNA-decapping enzyme 1A |
| 50 | DCSTAMP | Q9H295 | Dendritic cell-specific transmembrane protein |
| 51 | DDR2 | Q16832 | Discoidin domain-containing receptor 2 |
| 52 | DDX5 | P17844 | Probable ATP-dependent RNA helicase DDX5 |
| 53 | DGKG | P49619 | Diacylglycerol kinase gamma |
| 54 | DGUOK | Q16854 | Deoxyguanosine kinase, mitochondrial |
| 55 | DNAJB7 | Q7Z6W7 | DnaJ homolog subfamily B member 7 |
| 56 | DNAJC8 | O75937 | DnaJ homolog subfamily C member 8 |
| 57 | DPEP2 | Q9H4A9 | Dipeptidase 2 |
| 58 | DRD4 | P21917 | D(4) dopamine receptor |
| 59 | DRD5 | P21918 | D(1B) dopamine receptor |
| 60 | DYRK3 | O43781 | Dual specificity tyrosine-phosphorylation-regulated kinase 3 |
| 61 | EIF4G3 | O43432 | Eukaryotic translation initiation factor 4 gamma 3 |
| 62 | EMILIN2 | Q9BXX0 | EMILIN-2 |
| 63 | EPN3 | Q9H201 | Epsin-3 |
| 64 | EPS15L1 | Q9UBC2 | Epidermal growth factor receptor substrate 15-like 1 |
| 65 | ETV4 | P43268 | ETS translocation variant 4 |
| 66 | EVI5L | Q96CN4 | EVI5-like protein |
| 67 | EYA3 | Q99504 | Eyes absent homolog 3 |
| 68 | EZH1 | Q92800 | Histone-lysine N-methyltransferase EZH1 |
| 69 | F5 | P12259 | Coagulation factor V |
| 70 | FER | P16591 | Tyrosine-protein kinase Fer |
| 71 | FGD6 | Q6ZV73 | FYVE, RhoGEF and PH domain-containing protein 6 |
| 72 | FGL1 | Q08830 | Fibrinogen-like protein 1 |
| 73 | FHL3 | Q13643 | Four and a half LIM domains protein 3 |
| 74 | FOLR2 | P14207 | Folate receptor beta |
| 75 | FRK | P42685 | Tyrosine-protein kinase FRK |
| 76 | FSD1 | Q9BTV5 | Fibronectin type III and SPRY domain-containing protein 1 |
| 77 | GALK2 | Q01415 | N-acetylgalactosamine kinase |
| 78 | GALNS | P34059 | N-acetylgalactosamine-6-sulfatase |
| 79 | GDPD5 | Q8WTR4 | Glycerophosphodiester phosphodiesterase domain-containing protein 5 |
| 80 | GJA8 | P48165 | Gap junction alpha-8 protein |
| 81 | GJB2 | P29033 | Gap junction beta-2 protein |
| 82 | GLP1R | P43220 | Glucagon-like peptide 1 receptor |
| 83 | GNG4 | P50150 | Guanine nucleotide-binding protein G(I)/G(S)/G(O) subunit gamma-4 |
| 84 | GOSR2 | O14653 | Golgi SNAP receptor complex member 2 |
| 85 | GPR132 | Q9UNW8 | Probable G-protein coupled receptor 132 |
| 86 | GPR162 | Q16538 | Probable G-protein coupled receptor 162 |
| 87 | GRK3 | P35626 | Beta-adrenergic receptor kinase 2 |
| 88 | GRK5 | P34947 | G protein-coupled receptor kinase 5 |
| 89 | GUCY2D | Q02846 | Retinal guanylyl cyclase 1 |
| 90 | GZF1 | Q9H116 | GDNF-inducible zinc finger protein 1 |
| 91 | HELLS | Q9NRZ9 | Lymphoid-specific helicase |
| 92 | HOXD10 | P28358 | Homeobox protein Hox-D10 |
| 93 | HS2ST1 | Q7LGA3 | Heparan sulfate 2-O-sulfotransferase 1 |
| 94 | HTR1E | P28566 | 5-hydroxytryptamine receptor 1E |
| 95 | IL24 | Q13007 | Interleukin-24 |
| 96 | ISL2 | Q96A47 | Insulin gene enhancer protein ISL-2 |
| 97 | ITGA11 | Q9UKX5 | Integrin alpha-11 |
| 98 | ITPK1 | Q13572 | Inositol-tetrakisphosphate 1-kinase |
| 99 | JAG2 | Q9Y219 | Protein jagged-2 |
| 100 | KCNIP2 | Q9NS61 | Kv channel-interacting protein 2 |
| 101 | KCNK10 | P57789 | Potassium channel subfamily K member 10 |
| 102 | KIF17 | Q9P2E2 | Kinesin-like protein KIF17 |
| 103 | KLF15 | Q9UIH9 | Krueppel-like factor 15 |
| 104 | KLK7 | P49862 | Kallikrein-7 |
| 105 | LAMA3 | Q16787 | Laminin subunit alpha-3 |
| 106 | LAMB1 | P07942 | Laminin subunit beta-1 |
| 107 | LAMB2 | P55268 | Laminin subunit beta-2 |
| 108 | LDLRAD3 | Q86YD5 | Low-density lipoprotein receptor class A domain-containing protein 3 |
| 109 | LEPROT | O15243 | Leptin receptor gene-related protein |
| 110 | LIN9 | Q5TKA1 | Protein lin-9 homolog |
| 111 | LMO2 | P25791 | Rhombotin-2 |
| 112 | LMO3 | Q8TAP4 | LIM domain only protein 3 |
| 113 | LMTK3 | Q96Q04 | Serine/threonine-protein kinase LMTK3 |
| 114 | LRP1B | Q9NZR2 | Low-density lipoprotein receptor-related protein 1B |
| 115 | LRRK1 | Q38SD2 | Leucine-rich repeat serine/threonine-protein kinase 1 |
| 116 | MAK | P20794 | Serine/threonine-protein kinase MAK |
| 117 | MC5R | P33032 | Melanocortin receptor 5 |
| 118 | MFHAS1 | Q9Y4C4 | Malignant fibrous histiocytoma-amplified sequence 1 |
| 119 | MGAT1 | P26572 | Alpha-1,3-mannosyl-glycoprotein 2-beta-N-acetylglucosaminyltransferase |
| 120 | MICB | Q29980 | MHC class I polypeptide-related sequence B |
| 121 | MIER1 | Q8N108 | Mesoderm induction early response protein 1 |
| 122 | MMS19 | Q96T76 | MMS19 nucleotide excision repair protein homolog |
| 123 | MOB3B | Q86TA1 | MOB kinase activator 3B |
| 124 | MPST | P25325 | 3-mercaptopyruvate sulfurtransferase |
| 125 | MYH14 | Q7Z406 | Myosin-14 |
| 126 | NAALADL1 | Q9UQQ1 | Aminopeptidase NAALADL1 |
| 127 | NCBP2 | P52298 | Nuclear cap-binding protein subunit 2 |
| 128 | NDUFB2 | O95178 | NADH dehydrogenase [ubiquinone] 1 beta subcomplex subunit 2, mitochondrial |
| 129 | NFIL3 | Q16649 | Nuclear factor interleukin-3-regulated protein |
| 130 | NOVA1 | P51513 | RNA-binding protein Nova-1 |
| 131 | NR2E1 | Q9Y466 | Nuclear receptor subfamily 2 group E member 1 |
| 132 | NRDC | O43847 | Nardilysin |
| 133 | NRIP1 | P48552 | Nuclear receptor-interacting protein 1 |
| 134 | NTNG2 | Q96CW9 | Netrin-G2 |
| 135 | OXSR1 | O95747 | Serine/threonine-protein kinase OSR1 |
| 136 | PABPC3 | Q9H361 | Polyadenylate-binding protein 3 |
| 137 | PAFAH2 | Q99487 | Platelet-activating factor acetylhydrolase 2, cytoplasmic |
| 138 | PAK6 | Q9NQU5 | Serine/threonine-protein kinase PAK 6 |
| 139 | PAPOLB | Q9NRJ5 | Poly(A) polymerase beta |
| 140 | PARD3 | Q8TEW0 | Partitioning defective 3 homolog |
| 141 | PARG | Q86W56 | Poly(ADP-ribose) glycohydrolase |
| 142 | PCDH17 | O14917 | Protocadherin-17 |
| 143 | PCDH19 | Q8TAB3 | Protocadherin-19 |
| 144 | PCDHB2 | Q9Y5E7 | Protocadherin beta-2 |
| 145 | PCDHB8 | Q9UN66 | Protocadherin beta-8 |
| 146 | PCSK1 | P29120 | Neuroendocrine convertase 1 |
| 147 | PDE8A | O60658 | High affinity cAMP-specific and IBMX-insensitive 3’,5’-cyclic phosphodiesterase 8A |
| 148 | PHKG1 | Q16816 | Phosphorylase b kinase gamma catalytic chain, skeletal muscle/heart isoform |
| 149 | PIK3R5 | Q8WYR1 | Phosphoinositide 3-kinase regulatory subunit 5 |
| 150 | PKMYT1 | Q99640 | Membrane-associated tyrosine- and threonine-specific cdc2-inhibitory kinase |
| 151 | PLA2G12A | Q9BZM1 | Group XIIA secretory phospholipase A2 |
| 152 | PLCZ1 | Q86YW0 | 1-phosphatidylinositol 4,5-bisphosphate phosphodiesterase zeta-1 |
| 153 | PLK2 | Q9NYY3 | Serine/threonine-protein kinase PLK2 |
| 154 | PMP2 | P02689 | Myelin P2 protein |
| 155 | PPP1R1B | Q9UD71 | Protein phosphatase 1 regulatory subunit 1B |
| 156 | PPP1R3D | O95685 | Protein phosphatase 1 regulatory subunit 3D |
| 157 | PRICKLE1 | Q96MT3 | Prickle-like protein 1 |
| 158 | PRSS12 | P56730 | Neurotrypsin |
| 159 | PSMA6 | P60900 | Proteasome subunit alpha type-6 |
| 160 | PSMB1 | P20618 | Proteasome subunit beta type-1 |
| 161 | PTER | Q96BW5 | Phosphotriesterase-related protein |
| 162 | PTGER4 | P35408 | Prostaglandin E2 receptor EP4 subtype |
| 163 | PTOV1 | Q86YD1 | Prostate tumor-overexpressed gene 1 protein |
| 164 | PTPRJ | Q12913 | Receptor-type tyrosine-protein phosphatase eta |
| 165 | QRSL1 | Q9H0R6 | Glutamyl-tRNA(Gln) amidotransferase subunit A, mitochondrial |
| 166 | RAB4B | P61018 | Ras-related protein Rab-4B |
| 167 | RANBP9 | Q96S59 | Ran-binding protein 9 |
| 168 | RB1 | P06400 | Retinoblastoma-associated protein |
| 169 | RECQL5 | O94762 | ATP-dependent DNA helicase Q5 |
| 170 | RELA | Q04206 | Transcription factor p65 |
| 171 | RHCE | P18577 | Blood group Rh(CE) polypeptide |
| 172 | RNASE3 | P12724 | Eosinophil cationic protein |
| 173 | SCPEP1 | Q9HB40 | Retinoid-inducible serine carboxypeptidase |
| 174 | SELE | P16581 | E-selectin |
| 175 | SEMA6B | Q9H3T3 | Semaphorin-6B |
| 176 | SGPP1 | Q9BX95 | Sphingosine-1-phosphate phosphatase 1 |
| 177 | SIPA1 | Q96FS4 | Signal-induced proliferation-associated protein 1 |
| 178 | SIX4 | Q9UIU6 | Homeobox protein SIX4 |
| 179 | SLC13A1 | Q9BZW2 | Solute carrier family 13 member 1 |
| 180 | SLC14A1 | Q13336 | Urea transporter 1 |
| 181 | SLC17A1 | Q14916 | Sodium-dependent phosphate transport protein 1 |
| 182 | SLC25A37 | Q9NYZ2 | Mitoferrin-1 |
| 183 | SLC2A13 | Q96QE2 | Proton myo-inositol cotransporter |
| 184 | SLC30A1 | Q9Y6M5 | Zinc transporter 1 |
| 185 | SLC34A1 | Q06495 | Sodium-dependent phosphate transport protein 2A |
| 186 | SLC37A4 | O43826 | Glucose-6-phosphate exchanger SLC37A4 |
| 187 | SLC39A1 | Q9NY26 | Zinc transporter ZIP1 |
| 188 | SLC52A2 | Q9HAB3 | Solute carrier family 52, riboflavin transporter, member 2 |
| 189 | SLC5A6 | Q9Y289 | Sodium-dependent multivitamin transporter |
| 190 | SLC6A15 | Q9H2J7 | Sodium-dependent neutral amino acid transporter B(0)AT2 |
| 191 | SLC9A4 | Q6AI14 | Sodium/hydrogen exchanger 4 |
| 192 | SNTA1 | Q13424 | Alpha-1-syntrophin |
| 193 | SNX6 | Q9UNH7 | Sorting nexin-6 |
| 194 | SREBF1 | P36956 | Sterol regulatory element-binding protein 1 |
| 195 | STK32C | Q86UX6 | Serine/threonine-protein kinase 32C |
| 196 | STYK1 | Q6J9G0 | Tyrosine-protein kinase STYK1 |
| 197 | SYNJ2 | O15056 | Synaptojanin-2 |
| 198 | TEKT2 | Q9UIF3 | Tektin-2 |
| 199 | TIMM10 | P62072 | Mitochondrial import inner membrane translocase subunit Tim10 |
| 200 | TPCN1 | Q9ULQ1 | Two pore calcium channel protein 1 |
| 201 | TPK1 | Q9H3S4 | Thiamin pyrophosphokinase 1 |
| 202 | TRIM68 | Q6AZZ1 | E3 ubiquitin-protein ligase TRIM68 |
| 203 | TRUB1 | Q8WWH5 | Probable tRNA pseudouridine synthase 1 |
| 204 | TSC22D2 | O75157 | TSC22 domain family protein 2 |
| 205 | TXNDC9 | O14530 | Thioredoxin domain-containing protein 9 |
| 206 | UBE2I | P63279 | SUMO-conjugating enzyme UBC9 |
| 207 | UNC5B | Q8IZJ1 | Netrin receptor UNC5B |
| 208 | UQCRC1 | P31930 | Cytochrome b-c1 complex subunit 1, mitochondrial |
| 209 | UQCRFS1 | P47985 | Cytochrome b-c1 complex subunit Rieske, mitochondrial |
| 210 | UROD | P06132 | Uroporphyrinogen decarboxylase |
| 211 | VN1R2 | Q8NFZ6 | Vomeronasal type-1 receptor 2 |
| 212 | VPS26A | O75436 | Vacuolar protein sorting-associated protein 26A |
| 213 | VWF | P04275 | von Willebrand factor |
| 214 | ZBTB25 | P24278 | Zinc finger and BTB domain-containing protein 25 |
| 215 | ZBTB48 | P10074 | Telomere zinc finger-associated protein |
| 216 | ZNF141 | Q15928 | Zinc finger protein 141 |
| 217 | ZNF16 | P17020 | Zinc finger protein 16 |
| 218 | ZNF19 | P17023 | Zinc finger protein 19 |
| 219 | ZNF219 | Q9P2Y4 | Zinc finger protein 219 |
| 220 | ZNF384 | Q8TF68 | Zinc finger protein 384 |
| 221 | ZNF438 | Q7Z4V0 | Zinc finger protein 438 |
| 222 | ZNF565 | Q8N9K5 | Zinc finger protein 565 |
| 223 | ZNF616 | Q08AN1 | Zinc finger protein 616 |
| 224 | ZNF662 | Q6ZS27 | Zinc finger protein 662 |
| 225 | ZNF678 | Q5SXM1 | Zinc finger protein 678 |
| 226 | ZNF692 | Q9BU19 | Zinc finger protein 692 |
Confirmed traffic hits & Hodos2020 core gene signatures
| id | symbol | uniprot | name |
|---|---|---|---|
| 1 | ACSBG2 | Q5FVE4 | Long-chain-fatty-acid–CoA ligase ACSBG2 |
| 2 | ADAMTS6 | Q9UKP5 | A disintegrin and metalloproteinase with thrombospondin motifs 6 |
| 3 | APOB | P04114 | Apolipoprotein B-100 |
| 4 | ARVCF | O00192 | Armadillo repeat protein deleted in velo-cardio-facial syndrome |
| 5 | CCL27 | Q9Y4X3 | C-C motif chemokine 27 |
| 6 | CITED2 | Q99967 | Cbp/p300-interacting transactivator 2 |
| 7 | CLDN4 | O14493 | Claudin-4 |
| 8 | COL5A1 | P20908 | Collagen alpha-1(V) chain |
| 9 | CPZ | Q66K79 | Carboxypeptidase Z |
| 10 | CREBBP | Q92793 | CREB-binding protein |
| 11 | CYSLTR2 | Q9NS75 | Cysteinyl leukotriene receptor 2 |
| 12 | DCSTAMP | Q9H295 | Dendritic cell-specific transmembrane protein |
| 13 | EVI5L | Q96CN4 | EVI5-like protein |
| 14 | FOLR2 | P14207 | Folate receptor beta |
| 15 | GJB2 | P29033 | Gap junction beta-2 protein |
| 16 | GUCY2D | Q02846 | Retinal guanylyl cyclase 1 |
| 17 | HELLS | Q9NRZ9 | Lymphoid-specific helicase |
| 18 | IL24 | Q13007 | Interleukin-24 |
| 19 | ISL2 | Q96A47 | Insulin gene enhancer protein ISL-2 |
| 20 | KIF17 | Q9P2E2 | Kinesin-like protein KIF17 |
| 21 | LDLRAD3 | Q86YD5 | Low-density lipoprotein receptor class A domain-containing protein 3 |
| 22 | LIN9 | Q5TKA1 | Protein lin-9 homolog |
| 23 | LRRK1 | Q38SD2 | Leucine-rich repeat serine/threonine-protein kinase 1 |
| 24 | NOVA1 | P51513 | RNA-binding protein Nova-1 |
| 25 | NTNG2 | Q96CW9 | Netrin-G2 |
| 26 | OXSR1 | O95747 | Serine/threonine-protein kinase OSR1 |
| 27 | PCDHB2 | Q9Y5E7 | Protocadherin beta-2 |
| 28 | QRSL1 | Q9H0R6 | Glutamyl-tRNA(Gln) amidotransferase subunit A, mitochondrial |
| 29 | RECQL5 | O94762 | ATP-dependent DNA helicase Q5 |
| 30 | SLC30A1 | Q9Y6M5 | Zinc transporter 1 |
| 31 | STYK1 | Q6J9G0 | Tyrosine-protein kinase STYK1 |
| 32 | TPK1 | Q9H3S4 | Thiamin pyrophosphokinase 1 |
| 33 | VPS26A | O75436 | Vacuolar protein sorting-associated protein 26A |
| 34 | ZNF141 | Q15928 | Zinc finger protein 141 |
| 35 | ZNF384 | Q8TF68 | Zinc finger protein 384 |
Primary traffic hits & Hodos2020 all gene signatures
| id | symbol | uniprot | name |
|---|---|---|---|
| 1 | ABCB4 | P21439 | Phosphatidylcholine translocator ABCB4 |
| 2 | ABR | Q12979 | Active breakpoint cluster region-related protein |
| 3 | ADORA1 | P30542 | Adenosine receptor A1 |
| 4 | AKT2 | P31751 | RAC-beta serine/threonine-protein kinase |
| 5 | ASMT | P46597 | Acetylserotonin O-methyltransferase |
| 6 | ATP1B3 | P54709 | Sodium/potassium-transporting ATPase subunit beta-3 |
| 7 | AURKA | O14965 | Aurora kinase A |
| 8 | BAG3 | O95817 | BAG family molecular chaperone regulator 3 |
| 9 | BANK1 | Q8NDB2 | B-cell scaffold protein with ankyrin repeats |
| 10 | BCL6B | Q8N143 | B-cell CLL/lymphoma 6 member B protein |
| 11 | BLK | P51451 | Tyrosine-protein kinase Blk |
| 12 | C3AR1 | Q16581 | C3a anaphylatoxin chemotactic receptor |
| 13 | CACNA1C | Q13936 | Voltage-dependent L-type calcium channel subunit alpha-1C |
| 14 | CCL27 | Q9Y4X3 | C-C motif chemokine 27 |
| 15 | CCR8 | P51685 | C-C chemokine receptor type 8 |
| 16 | CCT5 | P48643 | T-complex protein 1 subunit epsilon |
| 17 | CDH2 | P19022 | Cadherin-2 |
| 18 | CHRM1 | P11229 | Muscarinic acetylcholine receptor M1 |
| 19 | CLDN4 | O14493 | Claudin-4 |
| 20 | CLK1 | P49759 | Dual specificity protein kinase CLK1 |
| 21 | CNR2 | P34972 | Cannabinoid receptor 2 |
| 22 | COL5A1 | P20908 | Collagen alpha-1(V) chain |
| 23 | CUL5 | Q93034 | Cullin-5 |
| 24 | CYSLTR2 | Q9NS75 | Cysteinyl leukotriene receptor 2 |
| 25 | DAB2 | P98082 | Disabled homolog 2 |
| 26 | DDR2 | Q16832 | Discoidin domain-containing receptor 2 |
| 27 | DDX5 | P17844 | Probable ATP-dependent RNA helicase DDX5 |
| 28 | DGKG | P49619 | Diacylglycerol kinase gamma |
| 29 | DGUOK | Q16854 | Deoxyguanosine kinase, mitochondrial |
| 30 | DPEP2 | Q9H4A9 | Dipeptidase 2 |
| 31 | DRD4 | P21917 | D(4) dopamine receptor |
| 32 | DRD5 | P21918 | D(1B) dopamine receptor |
| 33 | DYRK3 | O43781 | Dual specificity tyrosine-phosphorylation-regulated kinase 3 |
| 34 | EPN3 | Q9H201 | Epsin-3 |
| 35 | EVI5L | Q96CN4 | EVI5-like protein |
| 36 | EZH1 | Q92800 | Histone-lysine N-methyltransferase EZH1 |
| 37 | F5 | P12259 | Coagulation factor V |
| 38 | FER | P16591 | Tyrosine-protein kinase Fer |
| 39 | FGL1 | Q08830 | Fibrinogen-like protein 1 |
| 40 | FOLR2 | P14207 | Folate receptor beta |
| 41 | FRK | P42685 | Tyrosine-protein kinase FRK |
| 42 | FSD1 | Q9BTV5 | Fibronectin type III and SPRY domain-containing protein 1 |
| 43 | GALK2 | Q01415 | N-acetylgalactosamine kinase |
| 44 | GALNS | P34059 | N-acetylgalactosamine-6-sulfatase |
| 45 | GDPD5 | Q8WTR4 | Glycerophosphodiester phosphodiesterase domain-containing protein 5 |
| 46 | GJA8 | P48165 | Gap junction alpha-8 protein |
| 47 | GJB2 | P29033 | Gap junction beta-2 protein |
| 48 | GLP1R | P43220 | Glucagon-like peptide 1 receptor |
| 49 | GOSR2 | O14653 | Golgi SNAP receptor complex member 2 |
| 50 | GPR132 | Q9UNW8 | Probable G-protein coupled receptor 132 |
| 51 | GPR162 | Q16538 | Probable G-protein coupled receptor 162 |
| 52 | GRK3 | P35626 | Beta-adrenergic receptor kinase 2 |
| 53 | GRK5 | P34947 | G protein-coupled receptor kinase 5 |
| 54 | GUCY2D | Q02846 | Retinal guanylyl cyclase 1 |
| 55 | HS2ST1 | Q7LGA3 | Heparan sulfate 2-O-sulfotransferase 1 |
| 56 | HTR1E | P28566 | 5-hydroxytryptamine receptor 1E |
| 57 | IL24 | Q13007 | Interleukin-24 |
| 58 | ITGA11 | Q9UKX5 | Integrin alpha-11 |
| 59 | ITPK1 | Q13572 | Inositol-tetrakisphosphate 1-kinase |
| 60 | JAG2 | Q9Y219 | Protein jagged-2 |
| 61 | KCNIP2 | Q9NS61 | Kv channel-interacting protein 2 |
| 62 | KCNK10 | P57789 | Potassium channel subfamily K member 10 |
| 63 | LAMA3 | Q16787 | Laminin subunit alpha-3 |
| 64 | LAMB1 | P07942 | Laminin subunit beta-1 |
| 65 | LAMB2 | P55268 | Laminin subunit beta-2 |
| 66 | LDLRAD3 | Q86YD5 | Low-density lipoprotein receptor class A domain-containing protein 3 |
| 67 | LEPROT | O15243 | Leptin receptor gene-related protein |
| 68 | LMTK3 | Q96Q04 | Serine/threonine-protein kinase LMTK3 |
| 69 | LRP1B | Q9NZR2 | Low-density lipoprotein receptor-related protein 1B |
| 70 | LRRK1 | Q38SD2 | Leucine-rich repeat serine/threonine-protein kinase 1 |
| 71 | MAK | P20794 | Serine/threonine-protein kinase MAK |
| 72 | MC5R | P33032 | Melanocortin receptor 5 |
| 73 | MFHAS1 | Q9Y4C4 | Malignant fibrous histiocytoma-amplified sequence 1 |
| 74 | MMS19 | Q96T76 | MMS19 nucleotide excision repair protein homolog |
| 75 | MPST | P25325 | 3-mercaptopyruvate sulfurtransferase |
| 76 | MYH14 | Q7Z406 | Myosin-14 |
| 77 | NAALADL1 | Q9UQQ1 | Aminopeptidase NAALADL1 |
| 78 | NCBP2 | P52298 | Nuclear cap-binding protein subunit 2 |
| 79 | NOVA1 | P51513 | RNA-binding protein Nova-1 |
| 80 | NR2E1 | Q9Y466 | Nuclear receptor subfamily 2 group E member 1 |
| 81 | NRDC | O43847 | Nardilysin |
| 82 | NTNG2 | Q96CW9 | Netrin-G2 |
| 83 | OXSR1 | O95747 | Serine/threonine-protein kinase OSR1 |
| 84 | PABPC3 | Q9H361 | Polyadenylate-binding protein 3 |
| 85 | PAFAH2 | Q99487 | Platelet-activating factor acetylhydrolase 2, cytoplasmic |
| 86 | PAK6 | Q9NQU5 | Serine/threonine-protein kinase PAK 6 |
| 87 | PARD3 | Q8TEW0 | Partitioning defective 3 homolog |
| 88 | PARG | Q86W56 | Poly(ADP-ribose) glycohydrolase |
| 89 | PCDH17 | O14917 | Protocadherin-17 |
| 90 | PCDH19 | Q8TAB3 | Protocadherin-19 |
| 91 | PCDHB2 | Q9Y5E7 | Protocadherin beta-2 |
| 92 | PCDHB8 | Q9UN66 | Protocadherin beta-8 |
| 93 | PDE8A | O60658 | High affinity cAMP-specific and IBMX-insensitive 3’,5’-cyclic phosphodiesterase 8A |
| 94 | PHKG1 | Q16816 | Phosphorylase b kinase gamma catalytic chain, skeletal muscle/heart isoform |
| 95 | PKMYT1 | Q99640 | Membrane-associated tyrosine- and threonine-specific cdc2-inhibitory kinase |
| 96 | PLA2G12A | Q9BZM1 | Group XIIA secretory phospholipase A2 |
| 97 | PLCZ1 | Q86YW0 | 1-phosphatidylinositol 4,5-bisphosphate phosphodiesterase zeta-1 |
| 98 | PLK2 | Q9NYY3 | Serine/threonine-protein kinase PLK2 |
| 99 | PRSS12 | P56730 | Neurotrypsin |
| 100 | PSMA6 | P60900 | Proteasome subunit alpha type-6 |
| 101 | PSMB1 | P20618 | Proteasome subunit beta type-1 |
| 102 | PTGER4 | P35408 | Prostaglandin E2 receptor EP4 subtype |
| 103 | RAB4B | P61018 | Ras-related protein Rab-4B |
| 104 | RB1 | P06400 | Retinoblastoma-associated protein |
| 105 | RECQL5 | O94762 | ATP-dependent DNA helicase Q5 |
| 106 | RELA | Q04206 | Transcription factor p65 |
| 107 | RHCE | P18577 | Blood group Rh(CE) polypeptide |
| 108 | SELE | P16581 | E-selectin |
| 109 | SEMA6B | Q9H3T3 | Semaphorin-6B |
| 110 | SIPA1 | Q96FS4 | Signal-induced proliferation-associated protein 1 |
| 111 | SLC13A1 | Q9BZW2 | Solute carrier family 13 member 1 |
| 112 | SLC14A1 | Q13336 | Urea transporter 1 |
| 113 | SLC17A1 | Q14916 | Sodium-dependent phosphate transport protein 1 |
| 114 | SLC2A13 | Q96QE2 | Proton myo-inositol cotransporter |
| 115 | SLC30A1 | Q9Y6M5 | Zinc transporter 1 |
| 116 | SLC34A1 | Q06495 | Sodium-dependent phosphate transport protein 2A |
| 117 | SLC37A4 | O43826 | Glucose-6-phosphate exchanger SLC37A4 |
| 118 | SLC39A1 | Q9NY26 | Zinc transporter ZIP1 |
| 119 | SLC5A6 | Q9Y289 | Sodium-dependent multivitamin transporter |
| 120 | SLC6A15 | Q9H2J7 | Sodium-dependent neutral amino acid transporter B(0)AT2 |
| 121 | SLC9A4 | Q6AI14 | Sodium/hydrogen exchanger 4 |
| 122 | SNX6 | Q9UNH7 | Sorting nexin-6 |
| 123 | STK32C | Q86UX6 | Serine/threonine-protein kinase 32C |
| 124 | STYK1 | Q6J9G0 | Tyrosine-protein kinase STYK1 |
| 125 | TPCN1 | Q9ULQ1 | Two pore calcium channel protein 1 |
| 126 | TPK1 | Q9H3S4 | Thiamin pyrophosphokinase 1 |
| 127 | TRIM68 | Q6AZZ1 | E3 ubiquitin-protein ligase TRIM68 |
| 128 | TRUB1 | Q8WWH5 | Probable tRNA pseudouridine synthase 1 |
| 129 | UBE2I | P63279 | SUMO-conjugating enzyme UBC9 |
| 130 | UNC5B | Q8IZJ1 | Netrin receptor UNC5B |
| 131 | UQCRC1 | P31930 | Cytochrome b-c1 complex subunit 1, mitochondrial |
| 132 | UROD | P06132 | Uroporphyrinogen decarboxylase |
| 133 | VN1R2 | Q8NFZ6 | Vomeronasal type-1 receptor 2 |
| 134 | VWF | P04275 | von Willebrand factor |
| 135 | ZBTB25 | P24278 | Zinc finger and BTB domain-containing protein 25 |
Confirmed traffic hits & Hodos2020 all gene signatures
| id | symbol | uniprot | name |
|---|---|---|---|
| 1 | CCL27 | Q9Y4X3 | C-C motif chemokine 27 |
| 2 | CLDN4 | O14493 | Claudin-4 |
| 3 | COL5A1 | P20908 | Collagen alpha-1(V) chain |
| 4 | CYSLTR2 | Q9NS75 | Cysteinyl leukotriene receptor 2 |
| 5 | EVI5L | Q96CN4 | EVI5-like protein |
| 6 | FOLR2 | P14207 | Folate receptor beta |
| 7 | GJB2 | P29033 | Gap junction beta-2 protein |
| 8 | GUCY2D | Q02846 | Retinal guanylyl cyclase 1 |
| 9 | IL24 | Q13007 | Interleukin-24 |
| 10 | LDLRAD3 | Q86YD5 | Low-density lipoprotein receptor class A domain-containing protein 3 |
| 11 | LRRK1 | Q38SD2 | Leucine-rich repeat serine/threonine-protein kinase 1 |
| 12 | NOVA1 | P51513 | RNA-binding protein Nova-1 |
| 13 | NTNG2 | Q96CW9 | Netrin-G2 |
| 14 | OXSR1 | O95747 | Serine/threonine-protein kinase OSR1 |
| 15 | PCDHB2 | Q9Y5E7 | Protocadherin beta-2 |
| 16 | RECQL5 | O94762 | ATP-dependent DNA helicase Q5 |
| 17 | SLC30A1 | Q9Y6M5 | Zinc transporter 1 |
| 18 | STYK1 | Q6J9G0 | Tyrosine-protein kinase STYK1 |
| 19 | TPK1 | Q9H3S4 | Thiamin pyrophosphokinase 1 |
Mol Syst Biol (2022) 18(2):e10629
CFTR interactome mapping using the mammalian membrane two-hybrid
high-throughput screening systeme
Sang Hyun Lim, Jamie Snider, Liron Birimberg-Schwartz, Wan Ip, Joana C
Serralha, Hugo M Botelho, Miquéias Lopes-Pacheco, Madalena C Pinto,
Mohamed Taha Moutaoufik, Mara Zilocchi, Onofrio Laselva, Mohsen
Esmaeili, Max Kotlyar, Anna Lyakisheva, Priscilla Tang, Lucía López
Vázquez, Indira Akula, Farzaneh Aboualizadeh, Victoria Wong, Ingrid
Grozavu, Teuta Opacak-Bernardi, Zhong Yao, Meg Mendoza, Mohan Babu, Igor
Jurisica, Tanja Gonska, Christine E Bear, Margarida D Amaral, Igor
Stagljar
http://dx.doi.org/10.15252/msb.202110629
Application of a high-throughput screening variant of the Mammalian Membrane Two-Hybrid (MaMTH-HTS) to map the protein-protein interactions of wild-type (wt) and mutant CFTR (F508del) in HEK293 cells. Uncovered candidate proteins with potential roles in CFTR function/CF pathophysiology, including Fibrinogen Like 2 (FGL2).
wt- and F508del-CFTR had 178 and 223 unique interactions, respectively, as well as an overlap of 46 interactors (~10.2%).
To be done
# Concatenate all datasets
if(file.exists(path_intfull)){
alldata <- read_csv(path_intfull)
} else{
alldataL <- list(extdrg = extdrgF,
extdrg_prihits = extdrg_prihitsF,
extdrg_cnfhits = extdrg_cnfhitsF,
pankow2015_core = pankow2015_coreF,
pankow2015_wt = pankow2015_wtF,
pankow2015_df = pankow2015_dfF,
canato2018_df = canato2018_dfF,
canato2018_4RK = canato2018_4RKF,
santos2019_wt = santos2019_wtF,
santos2019_DDAA = santos2019_DDAAF,
hutt2018_wt = hutt2018_wtF,
hutt2018_df = hutt2018_dfF,
# rauniyar
almaca2013 = almaca2013F,
tomati2018 = tomati2018F,
simpson2012 = simpson2012F,
wang2006_wt = wang2006_wtF,
wang2006_df = wang2006_dfF,
reilly2017_wt = reilly2017_wtF,
reilly2017_df = reilly2017_dfF,
# gilchrist
pankow2019 = pankow2019F,
dang2020 = dang2020F,
hodos2020_deg = hodos2020_degF,
hodos2020_cosig = hodos2020_cosigF,
hodos2020_allsig = hodos2020_allsigF
# lim2022
)
alldata <- lapply(names(alldataL), function(x){
temp <- alldataL[[x]]
if(!"score" %in% names(temp)) temp$score <- NA
temp %>%
mutate(dataset = x) %>%
select(symbol, uniprot, ensembl, entrez, name, score, dataset)
})
alldata <- do.call(rbind, alldata)
write.csv(alldata, path_intfull)
}
# Summary: all genes, per dataset
# Which gene comes in which dataset
if(file.exists(path_intsummary)){
datsum <- read_csv(path_intsummary)
} else{
# One gene per line
datsum <- alldata %>%
mutate(symbol_uniprot = paste(symbol, uniprot)) %>%
group_by(symbol_uniprot) %>%
slice(1) %>%
ungroup() %>%
mutate(symbol = sub("^(.*) (.*)$", "\\1", symbol_uniprot)) %>%
mutate(uniprot = sub("^(.*) (.*)$", "\\2", symbol_uniprot)) %>%
select(symbol, uniprot)
# Find genes in each dataset
for(ds in sort(unique(alldata$dataset))){
ups <- alldata %>%
filter(dataset == ds) %>%
pull(uniprot)
bool <- sapply(datsum$uniprot, function(x) x %in% ups)
datsum <- mutate(datsum, "{ds}" := bool)
}
write.csv(datsum, path_intsummary, row.names = FALSE)
}
datatable(datsum)
# head(datsum,20)
######################################################
# Subset and sort datasets for Circos plot
######################################################
# Sorting: place all confirmed hits at the beginning of each data frame.
# This should remove much of the spider web effect on the circos plot.
selected_datasets <- c("extdrg_prihits", "extdrg_cnfhits", "pankow2015_df", "almaca2013", "tomati2018", "simpson2012", "reilly2017_df", "dang2020")
# selected_datasets <- c("extdrg_prihits", "extdrg_cnfhits", "pankow2015_df", "almaca2013")
key_genes <- c("DGKG", "GRK5", "LRRK1", "STYK1", "TPK1")
LUT_minmax <- c(min(alldata %>% filter(dataset %in% c("extdrg_prihits", "extdrg_cnfhits")) %>% pull(score)), 3)
# Confirmed screen hits
c_extdrg_cnfhits <- extdrg_cnfhitsF %>%
mutate(dataset = "extdrg_cnfhits") %>%
arrange(desc(score)) %>%
tibble::rowid_to_column("x") %>%
mutate(color = numeric_to_2color(score, colors=c("red", "green"), lutminmax=LUT_minmax, ncolors = 1000)) %>%
select(symbol, uniprot, score, color, dataset, x)
# Primary screen hits (arrange according to confirmed hits)
c_extdrg_prihits <- extdrg_prihitsF %>%
mutate(dataset = "extdrg_prihits") %>%
mutate(rank = match(uniprot, c_extdrg_cnfhits$uniprot)) %>%
arrange(rank, symbol) %>%
tibble::rowid_to_column("x") %>%
mutate(color = numeric_to_2color(score, colors=c("red", "green"), lutminmax=LUT_minmax, ncolors = 1000)) %>%
select(symbol, uniprot, score, color, dataset, x)
# Other datasets
c_others <- lapply(selected_datasets[-c(1,2)], function(ds){
temp <- alldata %>%
filter(dataset == ds) %>%
mutate(rank = match(uniprot, c_extdrg_prihits$uniprot)) %>%
arrange(rank, symbol) %>%
tibble::rowid_to_column("x") %>%
mutate(color = numeric_to_2color(NA)) %>%
select(symbol, uniprot, score, color, dataset, x)
})
c_others <- do.call(rbind, c_others)
# combination
c_all <- do.call(rbind, list(c_extdrg_prihits, c_extdrg_cnfhits, c_others))
######################################################
# Find genes co-occurring in 2 datasets
######################################################
# Find pairwise dataset/sector combinations
sector_combinations <- combn(selected_datasets, 2)
# Find shared genes
sharedg <- lapply(1:ncol(sector_combinations), function(j){
ds1 <- filter(c_all, dataset == sector_combinations[1,j])
ds2 <- filter(c_all, dataset == sector_combinations[2,j])
ds1 %>%
filter(uniprot %in% ds2$uniprot) %>%
rename(dataset1 = dataset) %>%
rename(x1 = x) %>%
mutate(dataset2 = sector_combinations[2,j]) %>%
mutate(x2 = ds2[match(uniprot, ds2$uniprot),"x", drop=T])
})
sharedg <- do.call(rbind, sharedg)
sharedg_cnfhits <- filter(sharedg, (dataset1 == "extdrg_cnfhits" & dataset2 != "extdrg_prihits") |
(dataset2 == "extdrg_cnfhits" & dataset1 != "extdrg_prihits"))
sharedg_prihits <- filter(sharedg, (dataset1 == "extdrg_prihits" & dataset2 != "extdrg_cnfhits") |
(dataset2 == "extdrg_prihits" & dataset1 != "extdrg_cnfhits"))
sharedg_others <- filter(sharedg, !dataset1 %in% c("extdrg_prihits", "extdrg_cnfhits") &
!dataset2 %in% c("extdrg_prihits", "extdrg_cnfhits"))
sharedg_main <- filter(sharedg, symbol %in% key_genes) %>%
filter((dataset1 == "extdrg_prihits" & dataset2 != "extdrg_cnfhits") |
(dataset2 == "extdrg_prihits" & dataset1 != "extdrg_cnfhits"))
# Start circos
c_track1 <- filter(c_all, dataset != "extdrg_cnfhits")
c_track2 <- filter(c_all, dataset == "extdrg_cnfhits")
circos.clear()
circos.par(gap.degree = 1)
circos.par(cell.padding = c(0.02, 1.00, 0.02, 1.00))
circos.initialize(c_track1$dataset, x = c_track1$x)
# Outer track - Primary hits
circos.track(ylim = c(0,1))
# Add ticks and gene names
tdatasets <- unique(c_track1$dataset)
for(sector.index in tdatasets) {
secdata <- filter(c_track1, dataset == sector.index)
# Add labels
xmid <- mean(range(secdata$x))
circos.text(xmid, 2, sector.index, facing="bending.inside", col = "darkred", sector.index = sector.index)
for(i in 1:nrow(secdata)){
x = secdata[i, "x", drop=T]
lbl = secdata[i, "symbol", drop=T]
color = secdata[i, "color", drop=T]
if(is.na(color)) color <- "darkgray"
circos.segments(x, 0, x, 1, col = color, lwd = 1, sector.index = sector.index)
circos.text(x, 1, lbl, facing="clockwise", niceFacing = TRUE, adj = c(-0.5,.8), cex=.2, sector.index = sector.index)
}
}
# Inner track - confirmed hits
# COnfirmed hits are mapped within the "extdrg_prihits" sector, since they overlap
circos.track(ylim = c(0,1), track.height=.15)
# Add ticks and gene names
sector.index <- "extdrg_prihits"
secdata <- c_track2
for(i in 1:nrow(secdata)){
x = secdata[i, "x", drop=T]
lbl = secdata[i, "symbol", drop=T]
color = secdata[i, "color", drop=T]
if(is.na(color)) color <- "darkgray"
circos.segments(x, 0, x, 1, col = color, lwd = 1, sector.index = sector.index)
}
# Link genes
# Among non-traffic hits
for(i in 1:nrow(sharedg_others)){
sector1 <- sharedg_others[i, "dataset1", drop=T]
sector2 <- sharedg_others[i, "dataset2", drop=T]
x1 <- sharedg_others[i, "x1", drop=T]
x2 <- sharedg_others[i, "x2", drop=T]
circos.link(sector1, x1, sector2, x2, col="gray", lwd = .5)
circos.segments(x1, 0, x1, 1, col = "gray", lwd = .5, sector.index = sector1)
circos.segments(x2, 0, x2, 1, col = "gray", lwd = .5, sector.index = sector2)
}
# Among non-confirmed hits and other datasets
for(i in 1:nrow(sharedg_prihits)){
sector1 <- sharedg_prihits[i, "dataset1", drop=T]
sector2 <- sharedg_prihits[i, "dataset2", drop=T]
x1 <- sharedg_prihits[i, "x1", drop=T]
x2 <- sharedg_prihits[i, "x2", drop=T]
circos.link(sector1, x1, sector2, x2, col="darkred")
if(sector1 != "extdrg_cnfhits") circos.segments(x1, 0, x1, 1, col = "darkred", lwd = 1, sector.index = sector1)
if(sector2 != "extdrg_cnfhits") circos.segments(x2, 0, x2, 1, col = "darkred", lwd = 1, sector.index = sector2)
}
# Among confirmed hits and other datasets
for(i in 1:nrow(sharedg_cnfhits)){
sector1 <- sharedg_cnfhits[i, "dataset1", drop=T]
sector2 <- sharedg_cnfhits[i, "dataset2", drop=T]
if(sector1 == "extdrg_cnfhits") sector1 <- "extdrg_prihits"
if(sector2 == "extdrg_cnfhits") sector2 <- "extdrg_prihits"
x1 <- sharedg_cnfhits[i, "x1", drop=T]
x2 <- sharedg_cnfhits[i, "x2", drop=T]
circos.link(sector1, x1, sector2, x2, col="red", lwd = 2)
if(sector1 != "extdrg_cnfhits") circos.segments(x1, 0, x1, 1, col = "red", lwd = 2, sector.index = sector1)
if(sector2 != "extdrg_cnfhits") circos.segments(x2, 0, x2, 1, col = "red", lwd = 2, sector.index = sector2)
}
# Among main hits
for(i in 1:nrow(sharedg_main)){
sector1 <- sharedg_main[i, "dataset1", drop=T]
sector2 <- sharedg_main[i, "dataset2", drop=T]
x1 <- sharedg_main[i, "x1", drop=T]
x2 <- sharedg_main[i, "x2", drop=T]
circos.link(sector1, x1, sector2, x2, col="red", lwd = 5)
circos.segments(x1, 0, x1, 1, col = "red", lwd = 5, sector.index = sector1)
circos.segments(x2, 0, x2, 1, col = "red", lwd = 5, sector.index = sector2)
}
# Highlight key genes
alldataC_key <- filter(c_track1, symbol %in% key_genes)
for(i in 1:nrow(alldataC_key)){
x <- alldataC_key[i, "x", drop=TRUE]
sector <- alldataC_key[i, "dataset", drop=TRUE]
lbl <- alldataC_key[i, "symbol", drop=TRUE]
circos.segments(x, 0, x, 1, col = "yellow", lwd = 1, sector.index = sector)
circos.text(x, 1, lbl, facing="clockwise", niceFacing = TRUE, adj = c(2.5,0.3), cex=.5, sector.index = sector)
}
######################################################
# Subset and sort datasets for Circos plot
######################################################
# Sorting: place all confirmed hits at the beginning of each data frame.
# This should remove much of the spider web effect on the circos plot.
selected_datasets <- c("extdrg_prihits", "extdrg_cnfhits", "pankow2015_df", "almaca2013", "canato2018_df", "tomati2018", "simpson2012", "reilly2017_df", "dang2020")
# selected_datasets <- c("extdrg_prihits", "extdrg_cnfhits", "pankow2015_df", "almaca2013")
key_genes <- c("DGKG", "GRK5", "LRRK1", "STYK1", "TPK1")
LUT_minmax <- c(min(alldata %>% filter(dataset %in% c("extdrg_prihits", "extdrg_cnfhits")) %>% pull(score)), 3)
# Confirmed screen hits
c_extdrg_cnfhits <- extdrg_cnfhitsF %>%
mutate(dataset = "extdrg_cnfhits") %>%
arrange(desc(score)) %>%
tibble::rowid_to_column("x") %>%
mutate(color = numeric_to_2color(score, colors=c("red", "green"), lutminmax=LUT_minmax, ncolors = 1000)) %>%
select(symbol, uniprot, score, color, dataset, x)
# Primary screen hits (arrange according to confirmed hits)
c_extdrg_prihits <- extdrg_prihitsF %>%
mutate(dataset = "extdrg_prihits") %>%
mutate(rank = match(uniprot, c_extdrg_cnfhits$uniprot)) %>%
arrange(rank, symbol) %>%
tibble::rowid_to_column("x") %>%
mutate(color = numeric_to_2color(score, colors=c("red", "green"), lutminmax=LUT_minmax, ncolors = 1000)) %>%
select(symbol, uniprot, score, color, dataset, x)
# Other datasets
c_others <- lapply(selected_datasets[-c(1,2)], function(ds){
temp <- alldata %>%
filter(dataset == ds) %>%
mutate(rank = match(uniprot, c_extdrg_prihits$uniprot)) %>%
arrange(rank, symbol) %>%
tibble::rowid_to_column("x") %>%
mutate(color = numeric_to_2color(NA)) %>%
select(symbol, uniprot, score, color, dataset, x)
})
c_others <- do.call(rbind, c_others)
# combination
c_all <- do.call(rbind, list(c_extdrg_prihits, c_extdrg_cnfhits, c_others))
######################################################
# Find genes co-occurring in 2 datasets
######################################################
# Find pairwise dataset/sector combinations
sector_combinations <- combn(selected_datasets, 2)
# Find shared genes
sharedg <- lapply(1:ncol(sector_combinations), function(j){
ds1 <- filter(c_all, dataset == sector_combinations[1,j])
ds2 <- filter(c_all, dataset == sector_combinations[2,j])
ds1 %>%
filter(uniprot %in% ds2$uniprot) %>%
rename(dataset1 = dataset) %>%
rename(x1 = x) %>%
mutate(dataset2 = sector_combinations[2,j]) %>%
mutate(x2 = ds2[match(uniprot, ds2$uniprot),"x", drop=T])
})
sharedg <- do.call(rbind, sharedg)
sharedg_cnfhits <- filter(sharedg, (dataset1 == "extdrg_cnfhits" & dataset2 != "extdrg_prihits") |
(dataset2 == "extdrg_cnfhits" & dataset1 != "extdrg_prihits"))
sharedg_prihits <- filter(sharedg, (dataset1 == "extdrg_prihits" & dataset2 != "extdrg_cnfhits") |
(dataset2 == "extdrg_prihits" & dataset1 != "extdrg_cnfhits"))
sharedg_others <- filter(sharedg, !dataset1 %in% c("extdrg_prihits", "extdrg_cnfhits") &
!dataset2 %in% c("extdrg_prihits", "extdrg_cnfhits"))
sharedg_main <- filter(sharedg, symbol %in% key_genes) %>%
filter((dataset1 == "extdrg_prihits" & dataset2 != "extdrg_cnfhits") |
(dataset2 == "extdrg_prihits" & dataset1 != "extdrg_cnfhits"))
# Start circos
c_track1 <- filter(c_all, dataset != "extdrg_cnfhits")
c_track2 <- filter(c_all, dataset == "extdrg_cnfhits")
circos.clear()
circos.par(gap.degree = 1)
circos.par(cell.padding = c(0.02, 1.00, 0.02, 1.00))
circos.initialize(c_track1$dataset, x = c_track1$x)
# Outer track - Primary hits
circos.track(ylim = c(0,1))
# Add ticks and gene names
tdatasets <- unique(c_track1$dataset)
for(sector.index in tdatasets) {
secdata <- filter(c_track1, dataset == sector.index)
# Add labels
xmid <- mean(range(secdata$x))
circos.text(xmid, 2, sector.index, facing="bending.inside", col = "darkred", sector.index = sector.index)
for(i in 1:nrow(secdata)){
x = secdata[i, "x", drop=T]
lbl = secdata[i, "symbol", drop=T]
color = secdata[i, "color", drop=T]
if(is.na(color)) color <- "darkgray"
circos.segments(x, 0, x, 1, col = color, lwd = 1, sector.index = sector.index)
circos.text(x, 1, lbl, facing="clockwise", niceFacing = TRUE, adj = c(-0.5,.8), cex=.2, sector.index = sector.index)
}
}
# Inner track - confirmed hits
# COnfirmed hits are mapped within the "extdrg_prihits" sector, since they overlap
circos.track(ylim = c(0,1), track.height=.15)
# Add ticks and gene names
sector.index <- "extdrg_prihits"
secdata <- c_track2
for(i in 1:nrow(secdata)){
x = secdata[i, "x", drop=T]
lbl = secdata[i, "symbol", drop=T]
color = secdata[i, "color", drop=T]
if(is.na(color)) color <- "darkgray"
circos.segments(x, 0, x, 1, col = color, lwd = 1, sector.index = sector.index)
}
# Link genes
# Among non-traffic hits
for(i in 1:nrow(sharedg_others)){
sector1 <- sharedg_others[i, "dataset1", drop=T]
sector2 <- sharedg_others[i, "dataset2", drop=T]
x1 <- sharedg_others[i, "x1", drop=T]
x2 <- sharedg_others[i, "x2", drop=T]
circos.link(sector1, x1, sector2, x2, col="gray", lwd = .5)
circos.segments(x1, 0, x1, 1, col = "gray", lwd = .5, sector.index = sector1)
circos.segments(x2, 0, x2, 1, col = "gray", lwd = .5, sector.index = sector2)
}
# Among non-confirmed hits and other datasets
for(i in 1:nrow(sharedg_prihits)){
sector1 <- sharedg_prihits[i, "dataset1", drop=T]
sector2 <- sharedg_prihits[i, "dataset2", drop=T]
x1 <- sharedg_prihits[i, "x1", drop=T]
x2 <- sharedg_prihits[i, "x2", drop=T]
circos.link(sector1, x1, sector2, x2, col="darkred")
if(sector1 != "extdrg_cnfhits") circos.segments(x1, 0, x1, 1, col = "darkred", lwd = 1, sector.index = sector1)
if(sector2 != "extdrg_cnfhits") circos.segments(x2, 0, x2, 1, col = "darkred", lwd = 1, sector.index = sector2)
}
# Among confirmed hits and other datasets
for(i in 1:nrow(sharedg_cnfhits)){
sector1 <- sharedg_cnfhits[i, "dataset1", drop=T]
sector2 <- sharedg_cnfhits[i, "dataset2", drop=T]
if(sector1 == "extdrg_cnfhits") sector1 <- "extdrg_prihits"
if(sector2 == "extdrg_cnfhits") sector2 <- "extdrg_prihits"
x1 <- sharedg_cnfhits[i, "x1", drop=T]
x2 <- sharedg_cnfhits[i, "x2", drop=T]
circos.link(sector1, x1, sector2, x2, col="red", lwd = 2)
if(sector1 != "extdrg_cnfhits") circos.segments(x1, 0, x1, 1, col = "red", lwd = 2, sector.index = sector1)
if(sector2 != "extdrg_cnfhits") circos.segments(x2, 0, x2, 1, col = "red", lwd = 2, sector.index = sector2)
}
# Among main hits
for(i in 1:nrow(sharedg_main)){
sector1 <- sharedg_main[i, "dataset1", drop=T]
sector2 <- sharedg_main[i, "dataset2", drop=T]
x1 <- sharedg_main[i, "x1", drop=T]
x2 <- sharedg_main[i, "x2", drop=T]
circos.link(sector1, x1, sector2, x2, col="red", lwd = 5)
circos.segments(x1, 0, x1, 1, col = "red", lwd = 5, sector.index = sector1)
circos.segments(x2, 0, x2, 1, col = "red", lwd = 5, sector.index = sector2)
}
# Highlight key genes
alldataC_key <- filter(c_track1, symbol %in% key_genes)
for(i in 1:nrow(alldataC_key)){
x <- alldataC_key[i, "x", drop=TRUE]
sector <- alldataC_key[i, "dataset", drop=TRUE]
lbl <- alldataC_key[i, "symbol", drop=TRUE]
circos.segments(x, 0, x, 1, col = "yellow", lwd = 1, sector.index = sector)
circos.text(x, 1, lbl, facing="clockwise", niceFacing = TRUE, adj = c(2.5,0.3), cex=.5, sector.index = sector)
}